forked from lijiext/lammps
Merge pull request #632 from timattox/USER-DPD_kokkos_merge
Add Kokkos version of the USER-DPD package
This commit is contained in:
commit
99791ce01c
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
fix dpd/energy command :h3
|
||||
fix dpd/energy/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -46,6 +47,29 @@ examples/USER/dpd directory.
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
fix eos/table/rx command :h3
|
||||
fix eos/table/rx/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -152,6 +153,29 @@ no 0.93 0.00 0.000 -1.76 :pre
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
fix rx command :h3
|
||||
fix rx/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -182,6 +183,29 @@ read_data data.dpd fix foo_SPECIES NULL Species
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
fix shardlow command :h3
|
||||
fix shardlow/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -52,6 +53,29 @@ examples/USER/dpd directory.
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
fix wall/lj93 command :h3
|
||||
fix wall/lj93/kk command :h3
|
||||
fix wall/lj126 command :h3
|
||||
fix wall/lj1043 command :h3
|
||||
fix wall/colloid command :h3
|
||||
|
@ -277,6 +278,31 @@ the total potential energy of the system (the quantity being
|
|||
minimized), you MUST enable the "fix_modify"_fix_modify.html {energy}
|
||||
option for this fix.
|
||||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:] none
|
||||
|
||||
[Related commands:]
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
pair_style dpd/fdt command :h3
|
||||
pair_style dpd/fdt/energy command :h3
|
||||
pair_style dpd/fdt/energy/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -125,6 +126,29 @@ significantly larger timesteps to be taken.
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
These commands are part of the USER-DPD package. They are only
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
pair_style exp6/rx command :h3
|
||||
pair_style exp6/rx/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -147,6 +148,31 @@ This style does not support the pair_modify tail option for adding long-range
|
|||
tail corrections to energy and pressure for the A,C terms in the
|
||||
pair interaction.
|
||||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -10,6 +10,7 @@ pair_style hybrid command :h3
|
|||
pair_style hybrid/omp command :h3
|
||||
pair_style hybrid/overlay command :h3
|
||||
pair_style hybrid/overlay/omp command :h3
|
||||
pair_style hybrid/overlay/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
pair_style multi/lucy/rx command :h3
|
||||
pair_style multi/lucy/rx/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -200,6 +201,29 @@ This pair style can only be used via the {pair} keyword of the
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
pair_style table/rx command :h3
|
||||
pair_style table/rx/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -223,6 +224,29 @@ This pair style can only be used via the {pair} keyword of the
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This command is part of the USER-DPD package. It is only enabled if
|
||||
|
|
|
@ -755,6 +755,12 @@ namespace Kokkos {
|
|||
return Random_XorShift64<DeviceType>(state_(i),i);
|
||||
}
|
||||
|
||||
// NOTE: state_idx MUST be unique and less than num_states
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64<DeviceType> get_state(const int state_idx) const {
|
||||
return Random_XorShift64<DeviceType>(state_(state_idx),state_idx);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void free_state(const Random_XorShift64<DeviceType>& state) const {
|
||||
state_(state.state_idx_) = state.state_;
|
||||
|
@ -1010,6 +1016,12 @@ namespace Kokkos {
|
|||
return Random_XorShift1024<DeviceType>(state_,p_(i),i);
|
||||
};
|
||||
|
||||
// NOTE: state_idx MUST be unique and less than num_states
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024<DeviceType> get_state(const int state_idx) const {
|
||||
return Random_XorShift1024<DeviceType>(state_,p_(state_idx),state_idx);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void free_state(const Random_XorShift1024<DeviceType>& state) const {
|
||||
for(int i = 0; i<16; i++)
|
||||
|
@ -1208,8 +1220,8 @@ Random_XorShift64<Kokkos::Cuda> Random_XorShift64_Pool<Kokkos::Cuda>::get_state(
|
|||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(const Random_XorShift64<Kokkos::Cuda> &state) const {
|
||||
#ifdef __CUDA_ARCH__
|
||||
state_(state.state_idx_) = state.state_;
|
||||
#ifdef __CUDA_ARCH__
|
||||
locks_(state.state_idx_) = 0;
|
||||
return;
|
||||
#endif
|
||||
|
@ -1244,9 +1256,9 @@ Random_XorShift1024<Kokkos::Cuda> Random_XorShift1024_Pool<Kokkos::Cuda>::get_st
|
|||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift1024<Kokkos::Cuda> &state) const {
|
||||
#ifdef __CUDA_ARCH__
|
||||
for(int i=0; i<16; i++)
|
||||
state_(state.state_idx_,i) = state.state_[i];
|
||||
#ifdef __CUDA_ARCH__
|
||||
locks_(state.state_idx_) = 0;
|
||||
return;
|
||||
#endif
|
||||
|
|
|
@ -115,6 +115,10 @@ if (test $1 = "USER-CGSDK") then
|
|||
depend USER-OMP
|
||||
fi
|
||||
|
||||
if (test $1 = "USER-DPD") then
|
||||
depend KOKKOS
|
||||
fi
|
||||
|
||||
if (test $1 = "USER-FEP") then
|
||||
depend USER-OMP
|
||||
fi
|
||||
|
|
|
@ -49,8 +49,12 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp
|
|||
action atom_vec_bond_kokkos.h atom_vec_bond.h
|
||||
action atom_vec_charge_kokkos.cpp
|
||||
action atom_vec_charge_kokkos.h
|
||||
action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp
|
||||
action atom_vec_dpd_kokkos.h atom_vec_dpd.h
|
||||
action atom_vec_full_kokkos.cpp atom_vec_full.cpp
|
||||
action atom_vec_full_kokkos.h atom_vec_full.h
|
||||
action atom_vec_hybrid_kokkos.cpp
|
||||
action atom_vec_hybrid_kokkos.h
|
||||
action atom_vec_kokkos.cpp
|
||||
action atom_vec_kokkos.h
|
||||
action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp
|
||||
|
@ -77,6 +81,8 @@ action domain_kokkos.cpp
|
|||
action domain_kokkos.h
|
||||
action fix_deform_kokkos.cpp
|
||||
action fix_deform_kokkos.h
|
||||
action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
|
||||
action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
|
||||
action fix_langevin_kokkos.cpp
|
||||
action fix_langevin_kokkos.h
|
||||
action fix_nh_kokkos.cpp
|
||||
|
@ -89,6 +95,8 @@ action fix_nve_kokkos.cpp
|
|||
action fix_nve_kokkos.h
|
||||
action fix_nvt_kokkos.cpp
|
||||
action fix_nvt_kokkos.h
|
||||
action fix_property_atom_kokkos.cpp
|
||||
action fix_property_atom_kokkos.h
|
||||
action fix_qeq_reax_kokkos.cpp fix_qeq_reax.cpp
|
||||
action fix_qeq_reax_kokkos.h fix_qeq_reax.h
|
||||
action fix_reaxc_bonds_kokkos.cpp fix_reaxc_bonds.cpp
|
||||
|
@ -97,10 +105,18 @@ action fix_reaxc_species_kokkos.cpp fix_reaxc_species.cpp
|
|||
action fix_reaxc_species_kokkos.h fix_reaxc_species.h
|
||||
action fix_setforce_kokkos.cpp
|
||||
action fix_setforce_kokkos.h
|
||||
action fix_shardlow_kokkos.cpp fix_shardlow.cpp
|
||||
action fix_shardlow_kokkos.h fix_shardlow.h
|
||||
action fix_momentum_kokkos.cpp
|
||||
action fix_momentum_kokkos.h
|
||||
action fix_wall_lj93_kokkos.cpp
|
||||
action fix_wall_lj93_kokkos.h
|
||||
action fix_wall_reflect_kokkos.cpp
|
||||
action fix_wall_reflect_kokkos.h
|
||||
action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp
|
||||
action fix_dpd_energy_kokkos.h fix_dpd_energy.h
|
||||
action fix_rx_kokkos.cpp fix_rx.cpp
|
||||
action fix_rx_kokkos.h fix_rx.h
|
||||
action gridcomm_kokkos.cpp gridcomm.cpp
|
||||
action gridcomm_kokkos.h gridcomm.h
|
||||
action improper_class2_kokkos.cpp improper_class2.cpp
|
||||
|
@ -124,8 +140,12 @@ action npair_copy_kokkos.cpp
|
|||
action npair_copy_kokkos.h
|
||||
action npair_kokkos.cpp
|
||||
action npair_kokkos.h
|
||||
action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp
|
||||
action npair_ssa_kokkos.h npair_half_bin_newton_ssa.h
|
||||
action nbin_kokkos.cpp
|
||||
action nbin_kokkos.h
|
||||
action nbin_ssa_kokkos.cpp nbin_ssa.cpp
|
||||
action nbin_ssa_kokkos.h nbin_ssa.h
|
||||
action math_special_kokkos.cpp
|
||||
action math_special_kokkos.h
|
||||
action pair_buck_coul_cut_kokkos.cpp
|
||||
|
@ -144,12 +164,20 @@ action pair_coul_long_kokkos.cpp pair_coul_long.cpp
|
|||
action pair_coul_long_kokkos.h pair_coul_long.h
|
||||
action pair_coul_wolf_kokkos.cpp
|
||||
action pair_coul_wolf_kokkos.h
|
||||
action pair_dpd_fdt_energy_kokkos.cpp pair_dpd_fdt_energy.cpp
|
||||
action pair_dpd_fdt_energy_kokkos.h pair_dpd_fdt_energy.h
|
||||
action pair_eam_kokkos.cpp pair_eam.cpp
|
||||
action pair_eam_kokkos.h pair_eam.h
|
||||
action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp
|
||||
action pair_eam_alloy_kokkos.h pair_eam_alloy.h
|
||||
action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
|
||||
action pair_eam_fs_kokkos.h pair_eam_fs.h
|
||||
action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp
|
||||
action pair_exp6_rx_kokkos.h pair_exp6_rx.h
|
||||
action pair_hybrid_kokkos.cpp
|
||||
action pair_hybrid_kokkos.h
|
||||
action pair_hybrid_overlay_kokkos.cpp
|
||||
action pair_hybrid_overlay_kokkos.h
|
||||
action pair_kokkos.h
|
||||
action pair_lj_charmm_coul_charmm_implicit_kokkos.cpp pair_lj_charmm_coul_charmm_implicit.cpp
|
||||
action pair_lj_charmm_coul_charmm_implicit_kokkos.h pair_lj_charmm_coul_charmm_implicit.h
|
||||
|
@ -183,6 +211,8 @@ action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
|
|||
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
|
||||
action pair_morse_kokkos.cpp
|
||||
action pair_morse_kokkos.h
|
||||
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
|
||||
action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
|
||||
action pair_reaxc_kokkos.cpp pair_reaxc.cpp
|
||||
action pair_reaxc_kokkos.h pair_reaxc.h
|
||||
action pair_sw_kokkos.cpp pair_sw.cpp
|
||||
|
@ -191,6 +221,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp
|
|||
action pair_vashishta_kokkos.h pair_vashishta.h
|
||||
action pair_table_kokkos.cpp
|
||||
action pair_table_kokkos.h
|
||||
action pair_table_rx_kokkos.cpp pair_table_rx.cpp
|
||||
action pair_table_rx_kokkos.h pair_table_rx.h
|
||||
action pair_tersoff_kokkos.cpp pair_tersoff.cpp
|
||||
action pair_tersoff_kokkos.h pair_tersoff.h
|
||||
action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
|
||||
|
@ -199,6 +231,8 @@ action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
|
|||
action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
|
||||
action pppm_kokkos.cpp pppm.cpp
|
||||
action pppm_kokkos.h pppm.h
|
||||
action rand_pool_wrap_kokkos.cpp
|
||||
action rand_pool_wrap_kokkos.h
|
||||
action region_block_kokkos.cpp
|
||||
action region_block_kokkos.h
|
||||
action verlet_kokkos.cpp
|
||||
|
|
|
@ -49,6 +49,7 @@ AtomKokkos::~AtomKokkos()
|
|||
memory->destroy_kokkos(k_radius, radius);
|
||||
memory->destroy_kokkos(k_rmass, rmass);
|
||||
memory->destroy_kokkos(k_omega, omega);
|
||||
memory->destroy_kokkos(k_angmom, angmom);
|
||||
memory->destroy_kokkos(k_torque, torque);
|
||||
|
||||
memory->destroy_kokkos(k_nspecial, nspecial);
|
||||
|
@ -73,6 +74,19 @@ AtomKokkos::~AtomKokkos()
|
|||
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
|
||||
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
|
||||
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
|
||||
|
||||
// USER-DPD package
|
||||
memory->destroy_kokkos(k_uCond,uCond);
|
||||
memory->destroy_kokkos(k_uMech,uMech);
|
||||
memory->destroy_kokkos(k_uChem,uChem);
|
||||
memory->destroy_kokkos(k_uCG,uCG);
|
||||
memory->destroy_kokkos(k_uCGnew,uCGnew);
|
||||
memory->destroy_kokkos(k_rho,rho);
|
||||
memory->destroy_kokkos(k_dpdTheta,dpdTheta);
|
||||
memory->destroy_kokkos(k_duChem,duChem);
|
||||
|
||||
memory->destroy_kokkos(k_dvector,dvector);
|
||||
dvector = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -227,6 +241,63 @@ void AtomKokkos::grow(unsigned int mask){
|
|||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
add a custom variable with name of type flag = 0/1 for int/double
|
||||
assumes name does not already exist
|
||||
return index in ivector or dvector of its location
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int AtomKokkos::add_custom(const char *name, int flag)
|
||||
{
|
||||
int index;
|
||||
|
||||
if (flag == 0) {
|
||||
index = nivector;
|
||||
nivector++;
|
||||
iname = (char **) memory->srealloc(iname,nivector*sizeof(char *),
|
||||
"atom:iname");
|
||||
int n = strlen(name) + 1;
|
||||
iname[index] = new char[n];
|
||||
strcpy(iname[index],name);
|
||||
ivector = (int **) memory->srealloc(ivector,nivector*sizeof(int *),
|
||||
"atom:ivector");
|
||||
memory->create(ivector[index],nmax,"atom:ivector");
|
||||
} else {
|
||||
index = ndvector;
|
||||
ndvector++;
|
||||
dname = (char **) memory->srealloc(dname,ndvector*sizeof(char *),
|
||||
"atom:dname");
|
||||
int n = strlen(name) + 1;
|
||||
dname[index] = new char[n];
|
||||
strcpy(dname[index],name);
|
||||
memory->grow_kokkos(k_dvector,dvector,ndvector,nmax,
|
||||
"atom:dvector");
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remove a custom variable of type flag = 0/1 for int/double at index
|
||||
free memory for vector and name and set ptrs to NULL
|
||||
ivector/dvector and iname/dname lists never shrink
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::remove_custom(int flag, int index)
|
||||
{
|
||||
if (flag == 0) {
|
||||
memory->destroy(ivector[index]);
|
||||
ivector[index] = NULL;
|
||||
delete [] iname[index];
|
||||
iname[index] = NULL;
|
||||
} else {
|
||||
//memory->destroy_kokkos(dvector);
|
||||
dvector[index] = NULL;
|
||||
delete [] dname[index];
|
||||
dname[index] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::deallocate_topology()
|
||||
|
|
|
@ -34,6 +34,7 @@ class AtomKokkos : public Atom {
|
|||
DAT::tdual_float_1d k_radius;
|
||||
DAT::tdual_float_1d k_rmass;
|
||||
DAT::tdual_v_array k_omega;
|
||||
DAT::tdual_v_array k_angmom;
|
||||
DAT::tdual_f_array k_torque;
|
||||
DAT::tdual_tagint_1d k_molecule;
|
||||
DAT::tdual_int_2d k_nspecial;
|
||||
|
@ -51,6 +52,14 @@ class AtomKokkos : public Atom {
|
|||
DAT::tdual_int_2d k_improper_type;
|
||||
DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4;
|
||||
|
||||
DAT::tdual_float_2d k_dvector;
|
||||
|
||||
|
||||
// USER-DPD package
|
||||
DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew,
|
||||
k_rho,k_dpdTheta,k_duChem;
|
||||
|
||||
|
||||
AtomKokkos(class LAMMPS *);
|
||||
~AtomKokkos();
|
||||
|
||||
|
@ -60,6 +69,8 @@ class AtomKokkos : public Atom {
|
|||
void sync_overlapping_device(const ExecutionSpace space, unsigned int mask);
|
||||
virtual void sort();
|
||||
virtual void grow(unsigned int mask);
|
||||
int add_custom(const char *, int);
|
||||
void remove_custom(int, int);
|
||||
virtual void deallocate_topology();
|
||||
void sync_modify(ExecutionSpace, unsigned int, unsigned int);
|
||||
private:
|
||||
|
|
|
@ -308,7 +308,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -336,7 +335,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -430,7 +428,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -463,7 +460,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -501,13 +497,11 @@ void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -753,13 +747,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecAngleKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -769,13 +761,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecAngleKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*size_border;
|
||||
|
@ -977,12 +967,10 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecAngleKokkos_UnpackBorder<LMPDeviceType>
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1241,13 +1229,11 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_
|
|||
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*elements;
|
||||
} else {
|
||||
AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*elements;
|
||||
}
|
||||
}
|
||||
|
@ -1405,7 +1391,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
|
|||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1414,7 +1399,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
|
|||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
|
@ -224,7 +224,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -252,7 +251,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -340,7 +338,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -369,7 +366,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -407,13 +403,11 @@ void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -655,13 +649,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecAtomicKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -671,13 +663,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecAtomicKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*6;
|
||||
|
@ -853,11 +843,9 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
if(space==Host) {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1009,12 +997,10 @@ int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
|
|||
if(space == Host) {
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*11;
|
||||
} else {
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*11;
|
||||
}
|
||||
}
|
||||
|
@ -1106,7 +1092,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
|||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/11,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1114,7 +1099,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
|||
k_count.sync<LMPDeviceType>();
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/11,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
|
@ -266,7 +266,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -294,7 +293,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -382,7 +380,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -411,7 +408,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -449,13 +445,11 @@ void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -701,13 +695,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecBondKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -717,13 +709,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecBondKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*size_border;
|
||||
|
@ -925,12 +915,10 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecBondKokkos_UnpackBorder<LMPDeviceType>
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1157,13 +1145,11 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
|
|||
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*elements;
|
||||
} else {
|
||||
AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*elements;
|
||||
}
|
||||
}
|
||||
|
@ -1299,7 +1285,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
|||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1308,7 +1293,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
|||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
|
@ -236,7 +236,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -264,7 +263,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -352,7 +350,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -381,7 +378,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -419,13 +415,11 @@ void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -669,13 +663,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecChargeKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -685,13 +677,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecChargeKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*size_border;
|
||||
|
@ -890,12 +880,10 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecChargeKokkos_UnpackBorder<LMPDeviceType>
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
}
|
||||
|
@ -1078,13 +1066,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
|
|||
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*12;
|
||||
} else {
|
||||
AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*12;
|
||||
}
|
||||
}
|
||||
|
@ -1181,7 +1167,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
|||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/12,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1190,7 +1175,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
|||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/12,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,137 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(dpd/kk,AtomVecDPDKokkos)
|
||||
AtomStyle(dpd/kk/device,AtomVecDPDKokkos)
|
||||
AtomStyle(dpd/kk/host,AtomVecDPDKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_DPD_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_DPD_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecDPDKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecDPDKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecDPDKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
int pack_comm_hybrid(int, int *, double *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int unpack_comm_hybrid(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int, char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
|
||||
double *uCond,*uMech,*uChem,*uCG,*uCGnew,*rho,*dpdTheta;
|
||||
double *duChem;
|
||||
|
||||
protected:
|
||||
DAT::t_efloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem;
|
||||
HAT::t_efloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem;
|
||||
|
||||
tagint *tag;
|
||||
imageint *image;
|
||||
int *type,*mask;
|
||||
double **x,**v,**f;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Per-processor system is too big
|
||||
|
||||
The number of owned atoms plus ghost atoms on a single
|
||||
processor must fit in 32-bit integer.
|
||||
|
||||
E: Invalid atom type in Atoms section of data file
|
||||
|
||||
Atom types must range from 1 to specified # of types.
|
||||
|
||||
*/
|
|
@ -396,7 +396,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -424,7 +423,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -515,7 +513,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -544,7 +541,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -582,13 +578,11 @@ void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -838,13 +832,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecFullKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -854,13 +846,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecFullKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*size_border;
|
||||
|
@ -1071,12 +1061,10 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecFullKokkos_UnpackBorder<LMPDeviceType>
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1422,13 +1410,11 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
|
|||
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*elements;
|
||||
} else {
|
||||
AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*elements;
|
||||
}
|
||||
}
|
||||
|
@ -1643,7 +1629,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
|||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1652,7 +1637,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
|||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,161 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(hybrid/kk,AtomVecHybridKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_HYBRID_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_HYBRID_KOKKOS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecHybridKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
int nstyles;
|
||||
class AtomVec **styles;
|
||||
char **keywords;
|
||||
|
||||
AtomVecHybridKokkos(class LAMMPS *);
|
||||
~AtomVecHybridKokkos();
|
||||
void process_args(int, char **);
|
||||
void init();
|
||||
void grow(int);
|
||||
void grow_reset();
|
||||
void copy(int, int, int);
|
||||
void clear_bonus();
|
||||
void force_clear(int, size_t);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, imageint, char **);
|
||||
int data_atom_hybrid(int, char **) {return 0;}
|
||||
void data_vel(int, char **);
|
||||
void pack_data(double **);
|
||||
void write_data(FILE *, int, double **);
|
||||
void pack_vel(double **);
|
||||
void write_vel(FILE *, int, double **);
|
||||
int property_atom(char *);
|
||||
void pack_property_atom(int, double *, int, int);
|
||||
bigint memory_usage();
|
||||
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
private:
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
double **omega,**angmom;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_v_array d_omega, d_angmom;
|
||||
HAT::t_v_array h_omega, h_angmom;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
|
||||
int nallstyles;
|
||||
char **allstyles;
|
||||
|
||||
void build_styles();
|
||||
int known_style(char *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Atom style hybrid cannot have hybrid as an argument
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Atom style hybrid cannot use same atom style twice
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot mix molecular and molecule template atom styles
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Per-processor system is too big
|
||||
|
||||
The number of owned atoms plus ghost atoms on a single
|
||||
processor must fit in 32-bit integer.
|
||||
|
||||
E: Invalid atom type in Atoms section of data file
|
||||
|
||||
Atom types must range from 1 to specified # of types.
|
||||
|
||||
*/
|
|
@ -387,7 +387,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
|
@ -415,7 +414,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
return n*size_forward;
|
||||
|
@ -506,7 +504,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
|
@ -535,7 +532,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
@ -573,13 +569,11 @@ void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
|||
modified(Host,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -825,13 +819,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecMolecularKokkos_PackBorder<LMPDeviceType,1> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -841,13 +833,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
|
|||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
AtomVecMolecularKokkos_PackBorder<LMPDeviceType,0> f(
|
||||
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
|
||||
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
return n*size_border;
|
||||
|
@ -1049,12 +1039,10 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first
|
|||
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPHostType::fence();
|
||||
} else {
|
||||
struct AtomVecMolecularKokkos_UnpackBorder<LMPDeviceType>
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1389,13 +1377,11 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl
|
|||
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPHostType::fence();
|
||||
return nsend*elements;
|
||||
} else {
|
||||
AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
LMPDeviceType::fence();
|
||||
return nsend*elements;
|
||||
}
|
||||
}
|
||||
|
@ -1608,7 +1594,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
|
|||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPHostType::fence();
|
||||
return k_count.h_view(0);
|
||||
} else {
|
||||
k_count.h_view(0) = nlocal;
|
||||
|
@ -1617,7 +1602,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
|
|||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/elements,f);
|
||||
LMPDeviceType::fence();
|
||||
k_count.modify<LMPDeviceType>();
|
||||
k_count.sync<LMPHostType>();
|
||||
|
||||
|
|
|
@ -499,7 +499,6 @@ void CommKokkos::exchange_device()
|
|||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
DeviceType::fence();
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
|
@ -535,7 +534,6 @@ void CommKokkos::exchange_device()
|
|||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
|
@ -560,7 +558,6 @@ void CommKokkos::exchange_device()
|
|||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
|
@ -593,7 +590,6 @@ void CommKokkos::exchange_device()
|
|||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -765,7 +761,6 @@ void CommKokkos::borders_device() {
|
|||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPHostType>();
|
||||
|
@ -782,7 +777,6 @@ void CommKokkos::borders_device() {
|
|||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPHostType>();
|
||||
}
|
||||
|
@ -911,7 +905,6 @@ void CommKokkos::borders_device() {
|
|||
|
||||
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
|
||||
atomKK->modified(exec_space,ALL_MASK);
|
||||
DeviceType::fence();
|
||||
atomKK->sync(Host,TAG_MASK);
|
||||
if (map_style) atom->map_set();
|
||||
}
|
||||
|
|
|
@ -99,7 +99,6 @@ void DomainKokkos::reset_box()
|
|||
DomainResetBoxFunctor<LMPDeviceType>
|
||||
f(atomKK->k_x);
|
||||
Kokkos::parallel_reduce(nlocal,f,result);
|
||||
LMPDeviceType::fence();
|
||||
|
||||
double (*extent)[2] = result.value;
|
||||
double all[3][2];
|
||||
|
@ -384,7 +383,6 @@ void DomainKokkos::pbc()
|
|||
Kokkos::parallel_for(nlocal,f);
|
||||
}
|
||||
}
|
||||
LMPDeviceType::fence();
|
||||
|
||||
atomKK->modified(Device,X_MASK|V_MASK|IMAGE_MASK);
|
||||
}
|
||||
|
@ -424,7 +422,6 @@ void DomainKokkos::remap_all()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_remap_all>(0,nlocal),*this);
|
||||
LMPDeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(Device,X_MASK | IMAGE_MASK);
|
||||
|
@ -528,7 +525,6 @@ void DomainKokkos::image_flip(int m_in, int n_in, int p_in)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_image_flip>(0,nlocal),*this);
|
||||
LMPDeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(Device,IMAGE_MASK);
|
||||
|
@ -561,7 +557,6 @@ void DomainKokkos::lamda2x(int n)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_lamda2x>(0,n),*this);
|
||||
LMPDeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(Device,X_MASK);
|
||||
|
@ -587,7 +582,6 @@ void DomainKokkos::x2lamda(int n)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_x2lamda>(0,n),*this);
|
||||
LMPDeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(Device,X_MASK);
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "fix_dpd_energy_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "modify.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
FixDPDenergyKokkos<DeviceType>::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixDPDenergy(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
pairDPDEKK = dynamic_cast<decltype(pairDPDEKK)>(pairDPDE);
|
||||
if (!pairDPDEKK)
|
||||
error->all(FLERR,"Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
void FixDPDenergyKokkos<DeviceType>::take_half_step()
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (igroup == atom->firstgroup) nlocal = atom->nfirst;
|
||||
|
||||
using AT = ArrayTypes<DeviceType>;
|
||||
|
||||
atomKK->sync(execution_space, UCOND_MASK);
|
||||
typename AT::t_efloat_1d uCond = atomKK->k_uCond.view<DeviceType>();
|
||||
atomKK->sync(execution_space, UMECH_MASK);
|
||||
typename AT::t_efloat_1d uMech = atomKK->k_uMech.view<DeviceType>();
|
||||
|
||||
pairDPDEKK->k_duCond.template sync<DeviceType>();
|
||||
typename AT::t_efloat_1d_const duCond = pairDPDEKK->k_duCond.template view<DeviceType>();
|
||||
pairDPDEKK->k_duMech.template sync<DeviceType>();
|
||||
typename AT::t_efloat_1d_const duMech = pairDPDEKK->k_duMech.template view<DeviceType>();
|
||||
|
||||
auto dt = update->dt;
|
||||
|
||||
Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
|
||||
uCond(i) += 0.5*dt*duCond(i);
|
||||
uMech(i) += 0.5*dt*duMech(i);
|
||||
});
|
||||
|
||||
atomKK->modified(execution_space, UCOND_MASK);
|
||||
atomKK->modified(execution_space, UMECH_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
void FixDPDenergyKokkos<DeviceType>::initial_integrate(int)
|
||||
{
|
||||
take_half_step();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
void FixDPDenergyKokkos<DeviceType>::final_integrate()
|
||||
{
|
||||
take_half_step();
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixDPDenergyKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixDPDenergyKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(dpd/energy/kk,FixDPDenergyKokkos<LMPDeviceType>)
|
||||
FixStyle(dpd/energy/kk/device,FixDPDenergyKokkos<LMPDeviceType>)
|
||||
FixStyle(dpd/energy/kk/host,FixDPDenergyKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_DPDE_KOKKOS_H
|
||||
#define LMP_FIX_DPDE_KOKKOS_H
|
||||
|
||||
#include "fix_dpd_energy.h"
|
||||
#include "pair_dpd_fdt_energy_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template <typename DeviceType>
|
||||
class FixDPDenergyKokkos : public FixDPDenergy {
|
||||
public:
|
||||
FixDPDenergyKokkos(class LAMMPS *, int, char **);
|
||||
virtual ~FixDPDenergyKokkos() {}
|
||||
virtual void initial_integrate(int);
|
||||
virtual void final_integrate();
|
||||
|
||||
void take_half_step();
|
||||
protected:
|
||||
PairDPDfdtEnergyKokkos<DeviceType>* pairDPDEKK;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
*/
|
|
@ -0,0 +1,569 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (Sandia)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "fix_eos_table_rx_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "comm.h"
|
||||
#include <math.h>
|
||||
#include "modify.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
#ifdef DBL_EPSILON
|
||||
#define MY_EPSILON (10.0*DBL_EPSILON)
|
||||
#else
|
||||
#define MY_EPSILON (10.0*2.220446049250313e-16)
|
||||
#endif
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixEOStableRXKokkos<DeviceType>::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixEOStableRX(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
update_table = 1;
|
||||
h_table = new TableHost();
|
||||
d_table = new TableDevice();
|
||||
|
||||
k_error_flag = DAT::tdual_int_scalar("fix:error_flag");
|
||||
k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag");
|
||||
|
||||
k_dHf = DAT::tdual_float_1d("fix:dHf",nspecies);
|
||||
k_energyCorr = DAT::tdual_float_1d("fix:energyCorr",nspecies);
|
||||
k_tempCorrCoeff = DAT::tdual_float_1d("fix:tempCorrCoeff",nspecies);
|
||||
k_moleculeCorrCoeff = DAT::tdual_float_1d("fix:moleculeCorrCoeff",nspecies);
|
||||
for (int n = 0; n < nspecies; n++) {
|
||||
k_dHf.h_view(n) = dHf[n];
|
||||
k_energyCorr.h_view(n) = energyCorr[n];
|
||||
k_tempCorrCoeff.h_view(n) = tempCorrCoeff[n];
|
||||
k_moleculeCorrCoeff.h_view(n) = moleculeCorrCoeff[n];
|
||||
}
|
||||
|
||||
k_dHf.modify<LMPHostType>();
|
||||
k_dHf.sync<DeviceType>();
|
||||
d_dHf = k_dHf.view<DeviceType>();
|
||||
|
||||
k_energyCorr.modify<LMPHostType>();
|
||||
k_energyCorr.sync<DeviceType>();
|
||||
d_energyCorr = k_energyCorr.view<DeviceType>();
|
||||
|
||||
k_tempCorrCoeff.modify<LMPHostType>();
|
||||
k_tempCorrCoeff.sync<DeviceType>();
|
||||
d_tempCorrCoeff = k_tempCorrCoeff.view<DeviceType>();
|
||||
|
||||
k_moleculeCorrCoeff.modify<LMPHostType>();
|
||||
k_moleculeCorrCoeff.sync<DeviceType>();
|
||||
d_moleculeCorrCoeff = k_moleculeCorrCoeff.view<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixEOStableRXKokkos<DeviceType>::~FixEOStableRXKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
delete h_table;
|
||||
delete d_table;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::setup(int vflag)
|
||||
{
|
||||
if (update_table)
|
||||
create_kokkos_tables();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
boltz = force->boltz;
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
uCond = atomKK->k_uCond.view<DeviceType>();
|
||||
uMech = atomKK->k_uMech.view<DeviceType>();
|
||||
uChem = atomKK->k_uChem.view<DeviceType>();
|
||||
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
|
||||
uCG = atomKK->k_uCG.view<DeviceType>();
|
||||
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
|
||||
dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
if (!this->restart_reset) {
|
||||
atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXSetup>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
}
|
||||
|
||||
// Communicate the updated momenta and velocities to all nodes
|
||||
atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,DPDTHETA_MASK);
|
||||
|
||||
error_check();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXSetup, const int &i) const {
|
||||
if (mask[i] & groupbit) {
|
||||
const double duChem = uCG[i] - uCGnew[i];
|
||||
uChem[i] += duChem;
|
||||
uCG[i] = 0.0;
|
||||
uCGnew[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup, const int &i) const {
|
||||
if (mask[i] & groupbit)
|
||||
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::init()
|
||||
{
|
||||
if (update_table)
|
||||
create_kokkos_tables();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
boltz = force->boltz;
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
uCond = atomKK->k_uCond.view<DeviceType>();
|
||||
uMech = atomKK->k_uMech.view<DeviceType>();
|
||||
uChem = atomKK->k_uChem.view<DeviceType>();
|
||||
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
|
||||
dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
if (this->restart_reset) {
|
||||
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,DPDTHETA_MASK);
|
||||
} else {
|
||||
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXInit>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK);
|
||||
}
|
||||
|
||||
error_check();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXInit, const int &i) const {
|
||||
double tmp;
|
||||
if (mask[i] & groupbit) {
|
||||
if(dpdTheta[i] <= 0.0)
|
||||
k_error_flag.template view<DeviceType>()() = 1;
|
||||
energy_lookup(i,dpdTheta[i],tmp);
|
||||
uCond[i] = 0.0;
|
||||
uMech[i] = tmp;
|
||||
uChem[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::post_integrate()
|
||||
{
|
||||
if (update_table)
|
||||
create_kokkos_tables();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
boltz = force->boltz;
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
uCond = atomKK->k_uCond.view<DeviceType>();
|
||||
uMech = atomKK->k_uMech.view<DeviceType>();
|
||||
uChem = atomKK->k_uChem.view<DeviceType>();
|
||||
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
|
||||
dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup2>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,DPDTHETA_MASK);
|
||||
|
||||
error_check();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const {
|
||||
if (mask[i] & groupbit){
|
||||
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
|
||||
if (dpdTheta[i] <= 0.0)
|
||||
k_error_flag.template view<DeviceType>()() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::end_of_step()
|
||||
{
|
||||
if (update_table)
|
||||
create_kokkos_tables();
|
||||
|
||||
copymode = 1;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
boltz = force->boltz;
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
uCond = atomKK->k_uCond.view<DeviceType>();
|
||||
uMech = atomKK->k_uMech.view<DeviceType>();
|
||||
uChem = atomKK->k_uChem.view<DeviceType>();
|
||||
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
|
||||
uCG = atomKK->k_uCG.view<DeviceType>();
|
||||
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
|
||||
dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
|
||||
// Communicate the ghost uCGnew
|
||||
atomKK->sync(Host,UCG_MASK | UCGNEW_MASK);
|
||||
comm->reverse_comm_fix(this);
|
||||
atomKK->modified(Host,UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXSetup>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
// Communicate the updated momenta and velocities to all nodes
|
||||
atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup2>(0,nlocal),*this);
|
||||
atomKK->modified(execution_space,DPDTHETA_MASK);
|
||||
|
||||
error_check();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate potential ui at temperature thetai
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, double &ui) const
|
||||
{
|
||||
int itable, nPG;
|
||||
double fraction, uTmp, nMolecules, nTotal, nTotalPG;
|
||||
double tolerance = 1.0e-10;
|
||||
|
||||
ui = 0.0;
|
||||
nTotal = 0.0;
|
||||
nTotalPG = 0.0;
|
||||
nPG = 0;
|
||||
|
||||
if (rx_flag) {
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++ ) {
|
||||
nTotal += dvector(ispecies,id);
|
||||
if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) {
|
||||
nPG++;
|
||||
nTotalPG += dvector(ispecies,id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nTotal = 1.0;
|
||||
}
|
||||
|
||||
for(int ispecies=0;ispecies<nspecies;ispecies++){
|
||||
//Table *tb = &tables[ispecies];
|
||||
//thetai = MAX(thetai,tb->lo);
|
||||
thetai = MAX(thetai,d_table_const.lo(ispecies));
|
||||
//thetai = MIN(thetai,tb->hi);
|
||||
thetai = MIN(thetai,d_table_const.hi(ispecies));
|
||||
|
||||
if (tabstyle == LINEAR) {
|
||||
//itable = static_cast<int> ((thetai - tb->lo) * tb->invdelta);
|
||||
itable = static_cast<int> ((thetai - d_table_const.lo(ispecies)) * d_table_const.invdelta(ispecies));
|
||||
//fraction = (thetai - tb->r[itable]) * tb->invdelta;
|
||||
fraction = (thetai - d_table_const.r(ispecies,itable)) * d_table_const.invdelta(ispecies);
|
||||
//uTmp = tb->e[itable] + fraction*tb->de[itable];
|
||||
uTmp = d_table_const.e(ispecies,itable) + fraction*d_table_const.de(ispecies,itable);
|
||||
|
||||
uTmp += d_dHf[ispecies];
|
||||
uTmp += d_tempCorrCoeff[ispecies]*thetai; // temperature correction
|
||||
uTmp += d_energyCorr[ispecies]; // energy correction
|
||||
if (nPG > 0) ui += d_moleculeCorrCoeff[ispecies]*nTotalPG/double(nPG); // molecule correction
|
||||
|
||||
if (rx_flag) nMolecules = dvector(ispecies,id);
|
||||
else nMolecules = 1.0;
|
||||
ui += nMolecules*uTmp;
|
||||
}
|
||||
}
|
||||
ui = ui - double(nTotal+1.5)*boltz*thetai;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate temperature thetai at energy ui
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, double &thetai) const
|
||||
{
|
||||
//Table *tb = &tables[0];
|
||||
|
||||
int it;
|
||||
double t1,t2,u1,u2,f1,f2;
|
||||
double maxit = 100;
|
||||
double temp;
|
||||
double delta = 0.001;
|
||||
double tolerance = 1.0e-10;
|
||||
int lo = d_table_const.lo(0);
|
||||
int hi = d_table_const.hi(0);
|
||||
|
||||
// Store the current thetai in t1
|
||||
t1 = MAX(thetai,lo);
|
||||
t1 = MIN(t1,hi);
|
||||
if(t1==hi) delta = -delta;
|
||||
|
||||
// Compute u1 at thetai
|
||||
energy_lookup(id,t1,u1);
|
||||
|
||||
// Compute f1
|
||||
f1 = u1 - ui;
|
||||
|
||||
// Compute guess of t2
|
||||
t2 = (1.0 + delta)*t1;
|
||||
|
||||
// Compute u2 at t2
|
||||
energy_lookup(id,t2,u2);
|
||||
|
||||
// Compute f1
|
||||
f2 = u2 - ui;
|
||||
|
||||
// Apply the Secant Method
|
||||
for(it=0; it<maxit; it++){
|
||||
if(fabs(f2-f1) < MY_EPSILON){
|
||||
if(isnan(f1) || isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
|
||||
temp = t1;
|
||||
temp = MAX(temp,lo);
|
||||
temp = MIN(temp,hi);
|
||||
k_warning_flag.template view<DeviceType>()() = 1;
|
||||
break;
|
||||
}
|
||||
temp = t2 - f2*(t2-t1)/(f2-f1);
|
||||
if(fabs(temp-t2) < tolerance) break;
|
||||
f1 = f2;
|
||||
t1 = t2;
|
||||
t2 = temp;
|
||||
energy_lookup(id,t2,u2);
|
||||
f2 = u2 - ui;
|
||||
}
|
||||
if(it==maxit){
|
||||
if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2))
|
||||
k_error_flag.template view<DeviceType>()() = 2;
|
||||
else
|
||||
k_error_flag.template view<DeviceType>()() = 3;
|
||||
}
|
||||
thetai = temp;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixEOStableRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
|
||||
{
|
||||
int ii,jj,m;
|
||||
HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view;
|
||||
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
|
||||
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
|
||||
|
||||
m = 0;
|
||||
for (ii = 0; ii < n; ii++) {
|
||||
jj = list[ii];
|
||||
buf[m++] = h_uChem[jj];
|
||||
buf[m++] = h_uCG[jj];
|
||||
buf[m++] = h_uCGnew[jj];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
|
||||
{
|
||||
int ii,m,last;
|
||||
HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view;
|
||||
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
|
||||
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
|
||||
|
||||
m = 0;
|
||||
last = first + n ;
|
||||
for (ii = first; ii < last; ii++){
|
||||
h_uChem[ii] = buf[m++];
|
||||
h_uCG[ii] = buf[m++];
|
||||
h_uCGnew[ii] = buf[m++];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixEOStableRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
|
||||
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = h_uCG[i];
|
||||
buf[m++] = h_uCGnew[i];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
|
||||
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
|
||||
h_uCG[j] += buf[m++];
|
||||
h_uCGnew[j] += buf[m++];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::error_check()
|
||||
{
|
||||
k_error_flag.template modify<DeviceType>();
|
||||
k_error_flag.template sync<LMPHostType>();
|
||||
if (k_error_flag.h_view() == 1)
|
||||
error->one(FLERR,"Internal temperature <= zero");
|
||||
else if (k_error_flag.h_view() == 2)
|
||||
error->one(FLERR,"NaN detected in secant solver.");
|
||||
else if (k_error_flag.h_view() == 3)
|
||||
error->one(FLERR,"Maxit exceeded in secant solver.");
|
||||
|
||||
k_warning_flag.template modify<DeviceType>();
|
||||
k_warning_flag.template sync<LMPHostType>();
|
||||
if (k_warning_flag.h_view()) {
|
||||
error->warning(FLERR,"Secant solver did not converge because table bounds were exceeded.");
|
||||
k_warning_flag.h_view() = 0;
|
||||
k_warning_flag.template modify<LMPHostType>();
|
||||
k_warning_flag.template sync<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables()
|
||||
{
|
||||
const int tlm1 = tablength-1;
|
||||
|
||||
memory->create_kokkos(d_table->lo,h_table->lo,ntables,"Table::lo");
|
||||
memory->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi");
|
||||
memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
|
||||
|
||||
if(tabstyle == LINEAR) {
|
||||
memory->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r");
|
||||
memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
|
||||
memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
|
||||
}
|
||||
|
||||
for(int i=0; i < ntables; i++) {
|
||||
Table* tb = &tables[i];
|
||||
|
||||
h_table->lo[i] = tb->lo;
|
||||
h_table->hi[i] = tb->hi;
|
||||
h_table->invdelta[i] = tb->invdelta;
|
||||
|
||||
for(int j = 0; j<h_table->r.dimension_1(); j++)
|
||||
h_table->r(i,j) = tb->r[j];
|
||||
for(int j = 0; j<h_table->e.dimension_1(); j++)
|
||||
h_table->e(i,j) = tb->e[j];
|
||||
for(int j = 0; j<h_table->de.dimension_1(); j++)
|
||||
h_table->de(i,j) = tb->de[j];
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_table->lo,h_table->lo);
|
||||
Kokkos::deep_copy(d_table->hi,h_table->hi);
|
||||
Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
|
||||
Kokkos::deep_copy(d_table->r,h_table->r);
|
||||
Kokkos::deep_copy(d_table->e,h_table->e);
|
||||
Kokkos::deep_copy(d_table->de,h_table->de);
|
||||
|
||||
d_table_const.lo = d_table->lo;
|
||||
d_table_const.hi = d_table->hi;
|
||||
d_table_const.invdelta = d_table->invdelta;
|
||||
d_table_const.r = d_table->r;
|
||||
d_table_const.e = d_table->e;
|
||||
d_table_const.de = d_table->de;
|
||||
|
||||
update_table = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixEOStableRXKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixEOStableRXKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(eos/table/rx/kk,FixEOStableRXKokkos<LMPDeviceType>)
|
||||
FixStyle(eos/table/rx/kk/device,FixEOStableRXKokkos<LMPDeviceType>)
|
||||
FixStyle(eos/table/rx/kk/host,FixEOStableRXKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_EOS_TABLE_RX_KOKKOS_H
|
||||
#define LMP_FIX_EOS_TABLE_RX_KOKKOS_H
|
||||
|
||||
#include "fix_eos_table_rx.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagFixEOStableRXInit{};
|
||||
struct TagFixEOStableRXSetup{};
|
||||
struct TagFixEOStableRXTemperatureLookup{};
|
||||
struct TagFixEOStableRXTemperatureLookup2{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixEOStableRXKokkos : public FixEOStableRX {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
FixEOStableRXKokkos(class LAMMPS *, int, char **);
|
||||
virtual ~FixEOStableRXKokkos();
|
||||
void setup(int);
|
||||
void init();
|
||||
void post_integrate();
|
||||
void end_of_step();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixEOStableRXInit, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixEOStableRXSetup, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixEOStableRXTemperatureLookup, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixEOStableRXTemperatureLookup2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void energy_lookup(int, double, double &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void temperature_lookup(int, double, double &) const;
|
||||
|
||||
protected:
|
||||
//struct Table {
|
||||
// int ninput;
|
||||
// double lo,hi;
|
||||
// double *rfile,*efile;
|
||||
// double *e2file;
|
||||
// double delta,invdelta,deltasq6;
|
||||
// double *r,*e,*de,*e2;
|
||||
//};
|
||||
//Table *tables, *tables2;
|
||||
|
||||
/*struct TableDeviceConst {
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread lo,hi;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread invdelta;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread r,e,de;
|
||||
};*/
|
||||
//Its faster not to use texture fetch if the number of tables is less than 32!
|
||||
struct TableDeviceConst {
|
||||
typename ArrayTypes<DeviceType>::t_int_1d lo,hi;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d invdelta;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread r,e,de;
|
||||
};
|
||||
|
||||
struct TableDevice {
|
||||
typename ArrayTypes<DeviceType>::t_int_1d lo,hi;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d invdelta;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d r,e,de;
|
||||
};
|
||||
|
||||
struct TableHost {
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d lo,hi;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_1d invdelta;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_2d r,e,de;
|
||||
};
|
||||
|
||||
TableDeviceConst d_table_const;
|
||||
TableDevice* d_table;
|
||||
TableHost* h_table;
|
||||
|
||||
int **tabindex;
|
||||
|
||||
double boltz;
|
||||
|
||||
void allocate();
|
||||
void error_check();
|
||||
int update_table;
|
||||
void create_kokkos_tables();
|
||||
|
||||
DAT::tdual_float_1d k_dHf,k_energyCorr,k_tempCorrCoeff,k_moleculeCorrCoeff;
|
||||
typename AT::t_float_1d d_dHf,d_energyCorr,d_tempCorrCoeff,d_moleculeCorrCoeff;
|
||||
|
||||
typename AT::t_int_1d mask;
|
||||
typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem;
|
||||
typename AT::t_float_2d dvector;
|
||||
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
DAT::tdual_int_scalar k_warning_flag;
|
||||
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
int pack_forward_comm(int , int *, double *, int, int *);
|
||||
void unpack_forward_comm(int , int , double *);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: FixEOStableRXKokkos requires a fix rx command.
|
||||
|
||||
The fix rx command must come before the pair style command in the input file
|
||||
|
||||
E: There are no rx species specified
|
||||
|
||||
There must be at least one species specified through the fix rx command
|
||||
|
||||
E: Invalid eos/table/rx length
|
||||
|
||||
The eos/table/rx table must have more than one entry.
|
||||
|
||||
E: eos/table/rx values are not increasing
|
||||
|
||||
The equation-of-state must an increasing function
|
||||
|
||||
E: FixEOStableRX requires atom_style with internal temperature and energies (e.g. dpd)
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Internal temperature <= zero.
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot open eos table/rx potential file %s
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Incorrect format in eos table/rx file
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot open file %s
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Did not find keyword in table file
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Illegal fix eos/table/rx command
|
||||
|
||||
Incorrect number of arguments specified for the fix eos/table/rx command.
|
||||
|
||||
E: Invalid keyword in fix eos/table/rx parameters
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: The number of columns in fix eos/table/rx does not match the number of species.
|
||||
|
||||
Self-explanatory. Check format for fix eos/table/rx file.
|
||||
|
||||
E: fix eos/table/rx parameters did not set N
|
||||
|
||||
The number of table entries was not set in the eos/table/rx file
|
||||
|
||||
W: Secant solver did not converge because table bounds were exceeded
|
||||
|
||||
The secant solver failed to converge, resulting in the lower or upper table bound temperature to be returned
|
||||
|
||||
E: NaN detected in secant solver.
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Maxit exceeded in secant solver
|
||||
|
||||
The maximum number of interations was exceeded in the secant solver
|
||||
|
||||
*/
|
|
@ -0,0 +1,72 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "fix_property_atom_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
enum{MOLECULE,CHARGE,RMASS,INTEGER,DOUBLE};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixPropertyAtom(lmp, narg, arg)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate atom-based arrays
|
||||
initialize new values to 0,
|
||||
since AtomVec class won't do it as atoms are added,
|
||||
e.g. in create_atom() or data_atom()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
||||
{
|
||||
for (int m = 0; m < nvalue; m++) {
|
||||
if (style[m] == MOLECULE) {
|
||||
memory->grow(atom->molecule,nmax,"atom:molecule");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
|
||||
memset(&atom->molecule[nmax_old],0,nbytes);
|
||||
} else if (style[m] == CHARGE) {
|
||||
memory->grow(atom->q,nmax,"atom:q");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->q[nmax_old],0,nbytes);
|
||||
} else if (style[m] == RMASS) {
|
||||
memory->grow(atom->rmass,nmax,"atom:rmass");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->rmass[nmax_old],0,nbytes);
|
||||
} else if (style[m] == INTEGER) {
|
||||
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
||||
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
||||
} else if (style[m] == DOUBLE) {
|
||||
memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.dimension_0(),nmax,
|
||||
"atom:dvector");
|
||||
//memory->grow(atom->dvector[index[m]],nmax,"atom:dvector");
|
||||
//size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
//memset(&atom->dvector[index[m]][nmax_old],0,nbytes);
|
||||
}
|
||||
}
|
||||
|
||||
nmax_old = nmax;
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(property/atom/kk,FixPropertyAtomKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_PROPERTY_ATOM_KOKKOS_H
|
||||
#define LMP_FIX_PROPERTY_ATOM_KOKKOS_H
|
||||
|
||||
#include "fix_property_atom.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixPropertyAtomKokkos : public FixPropertyAtom {
|
||||
public:
|
||||
FixPropertyAtomKokkos(class LAMMPS *, int, char **);
|
||||
virtual ~FixPropertyAtomKokkos() {}
|
||||
|
||||
void grow_arrays(int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Fix property/atom mol when atom_style already has molecule attribute
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Fix property/atom cannot specify mol twice
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Fix property/atom q when atom_style already has charge attribute
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Fix property/atom cannot specify q twice
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Fix property/atom vector name already exists
|
||||
|
||||
The name for an integer or floating-point vector must be unique.
|
||||
|
||||
W: Fix property/atom mol or charge w/out ghost communication
|
||||
|
||||
A model typically needs these properties defined for ghost atoms.
|
||||
|
||||
E: Atom style was redefined after using fix property/atom
|
||||
|
||||
This is not allowed.
|
||||
|
||||
E: Incorrect %s format in data file
|
||||
|
||||
A section of the data file being read by fix property/atom does
|
||||
not have the correct number of values per line.
|
||||
|
||||
E: Too few lines in %s section of data file
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Invalid atom ID in %s section of data file
|
||||
|
||||
An atom in a section of the data file being read by fix property/atom
|
||||
has an invalid atom ID that is <= 0 or > the maximum existing atom ID.
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,282 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(rx/kk,FixRxKokkos<LMPDeviceType>)
|
||||
FixStyle(rx/kk/device,FixRxKokkos<LMPDeviceType>)
|
||||
FixStyle(rx/kk/host,FixRxKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_RX_KOKKOS_H
|
||||
#define LMP_FIX_RX_KOKKOS_H
|
||||
|
||||
#include "fix_rx.h"
|
||||
#include "pair_dpd_fdt_energy_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct Tag_FixRxKokkos_zeroTemperatureViews {};
|
||||
struct Tag_FixRxKokkos_zeroCounterViews {};
|
||||
|
||||
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
struct Tag_FixRxKokkos_firstPairOperator {};
|
||||
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
|
||||
struct Tag_FixRxKokkos_2ndPairOperator {};
|
||||
|
||||
template <bool ZERO_RATES>
|
||||
struct Tag_FixRxKokkos_solveSystems {};
|
||||
|
||||
struct s_CounterType
|
||||
{
|
||||
int nSteps, nIters, nFuncs, nFails;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_CounterType& operator+=(const s_CounterType &rhs)
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
volatile s_CounterType& operator+=(const volatile s_CounterType &rhs) volatile
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
typedef struct s_CounterType CounterType;
|
||||
|
||||
template <typename DeviceType>
|
||||
class FixRxKokkos : public FixRX {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
FixRxKokkos(class LAMMPS *, int, char **);
|
||||
virtual ~FixRxKokkos();
|
||||
virtual void init();
|
||||
void init_list(int, class NeighList *);
|
||||
void post_constructor();
|
||||
virtual void setup_pre_force(int);
|
||||
virtual void pre_force(int);
|
||||
|
||||
// Define a value_type here for the reduction operator on CounterType.
|
||||
typedef CounterType value_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_zeroCounterViews, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int&) const;
|
||||
|
||||
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_firstPairOperator<WT_FLAG,NEWTON_PAIR,NEIGHFLAG>, const int&) const;
|
||||
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_2ndPairOperator<WT_FLAG,LOCAL_TEMP_FLAG>, const int&) const;
|
||||
|
||||
template <bool ZERO_RATES>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_solveSystems<ZERO_RATES>, const int&, CounterType&) const;
|
||||
|
||||
//protected:
|
||||
PairDPDfdtEnergyKokkos<DeviceType>* pairDPDEKK;
|
||||
double VDPD;
|
||||
|
||||
double boltz;
|
||||
double t_stop;
|
||||
|
||||
template <typename T, int stride = 1>
|
||||
struct StridedArrayType
|
||||
{
|
||||
typedef T value_type;
|
||||
enum { Stride = stride };
|
||||
|
||||
value_type *m_data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
StridedArrayType() : m_data(NULL) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
StridedArrayType(value_type *ptr) : m_data(ptr) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION value_type& operator()(const int idx) { return m_data[Stride*idx]; }
|
||||
KOKKOS_INLINE_FUNCTION const value_type& operator()(const int idx) const { return m_data[Stride*idx]; }
|
||||
KOKKOS_INLINE_FUNCTION value_type& operator[](const int idx) { return m_data[Stride*idx]; }
|
||||
KOKKOS_INLINE_FUNCTION const value_type& operator[](const int idx) const { return m_data[Stride*idx]; }
|
||||
};
|
||||
|
||||
template <int stride = 1>
|
||||
struct UserRHSDataKokkos
|
||||
{
|
||||
StridedArrayType<double,1> kFor;
|
||||
StridedArrayType<double,1> rxnRateLaw;
|
||||
};
|
||||
|
||||
void solve_reactions(const int vflag, const bool isPreForce);
|
||||
|
||||
int rhs (double, const double *, double *, void *) const;
|
||||
int rhs_dense (double, const double *, double *, void *) const;
|
||||
int rhs_sparse(double, const double *, double *, void *) const;
|
||||
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int k_rhs (double, const VectorType&, VectorType&, UserDataType& ) const;
|
||||
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int k_rhs_dense (double, const VectorType&, VectorType&, UserDataType& ) const;
|
||||
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int k_rhs_sparse(double, const VectorType&, VectorType&, UserDataType& ) const;
|
||||
|
||||
//!< Classic Runge-Kutta 4th-order stepper.
|
||||
void rk4(const double t_stop, double *y, double *rwork, void *v_params) const;
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE Solver.
|
||||
void rkf45(const int neq, const double t_stop, double *y, double *rwork, void *v_params, CounterType& counter) const;
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE stepper function.
|
||||
void rkf45_step (const int neq, const double h, double y[], double y_out[],
|
||||
double rwk[], void *) const;
|
||||
|
||||
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
|
||||
int rkf45_h0 (const int neq, const double t, const double t_stop,
|
||||
const double hmin, const double hmax,
|
||||
double& h0, double y[], double rwk[], void *v_params) const;
|
||||
|
||||
//!< Classic Runge-Kutta 4th-order stepper.
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void k_rk4(const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData) const;
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE Solver.
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void k_rkf45(const int neq, const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData, CounterType& counter) const;
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE stepper function.
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void k_rkf45_step (const int neq, const double h, VectorType& y, VectorType& y_out,
|
||||
VectorType& rwk, UserDataType& userData) const;
|
||||
|
||||
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
|
||||
template <typename VectorType, typename UserDataType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int k_rkf45_h0 (const int neq, const double t, const double t_stop,
|
||||
const double hmin, const double hmax,
|
||||
double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const;
|
||||
|
||||
//!< ODE Solver diagnostics.
|
||||
void odeDiagnostics(void);
|
||||
|
||||
//!< Special counters per-ode.
|
||||
int *diagnosticCounterPerODEnSteps;
|
||||
int *diagnosticCounterPerODEnFuncs;
|
||||
DAT::tdual_int_1d k_diagnosticCounterPerODEnSteps;
|
||||
DAT::tdual_int_1d k_diagnosticCounterPerODEnFuncs;
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d d_diagnosticCounterPerODEnSteps;
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d d_diagnosticCounterPerODEnFuncs;
|
||||
typename AT::t_int_1d d_diagnosticCounterPerODEnSteps;
|
||||
typename AT::t_int_1d d_diagnosticCounterPerODEnFuncs;
|
||||
HAT::t_int_1d h_diagnosticCounterPerODEnSteps;
|
||||
HAT::t_int_1d h_diagnosticCounterPerODEnFuncs;
|
||||
|
||||
template <typename KokkosDeviceType>
|
||||
struct KineticsType
|
||||
{
|
||||
// Arrhenius rate coefficients.
|
||||
typename ArrayTypes<KokkosDeviceType>::t_float_1d Arr, nArr, Ea;
|
||||
|
||||
// Dense versions.
|
||||
typename ArrayTypes<KokkosDeviceType>::t_float_2d stoich, stoichReactants, stoichProducts;
|
||||
|
||||
// Sparse versions.
|
||||
typename ArrayTypes<KokkosDeviceType>::t_int_2d nuk, inu;
|
||||
typename ArrayTypes<KokkosDeviceType>::t_float_2d nu;
|
||||
typename ArrayTypes<KokkosDeviceType>::t_int_1d isIntegral;
|
||||
};
|
||||
|
||||
//!< Kokkos versions of the kinetics data.
|
||||
KineticsType<LMPHostType> h_kineticsData;
|
||||
KineticsType<DeviceType> d_kineticsData;
|
||||
|
||||
bool update_kinetics_data;
|
||||
|
||||
void create_kinetics_data(void);
|
||||
|
||||
// Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks.
|
||||
DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights;
|
||||
//typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdThetaLocal, d_sumWeights;
|
||||
typename AT::t_efloat_1d d_dpdThetaLocal, d_sumWeights;
|
||||
HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread d_x ;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_type ;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
//double **h_cutsq;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_ilist ;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh ;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_dvector;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_mask ;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace;
|
||||
size_t scratchSpaceSize;
|
||||
|
||||
// Error flag for any failures.
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
void computeLocalTemperature();
|
||||
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
int pack_forward_comm(int , int *, double *, int, int *);
|
||||
void unpack_forward_comm(int , int , double *);
|
||||
|
||||
//private: // replicate a few from FixRX
|
||||
int my_restartFlag;
|
||||
int nlocal;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,856 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos (U.S. Army Research Laboratory)
|
||||
and Timothy I. Mattox (Engility Corporation)
|
||||
|
||||
Martin Lisal (Institute of Chemical Process Fundamentals
|
||||
of the Czech Academy of Sciences and J. E. Purkinje University)
|
||||
|
||||
John Brennan, Joshua Moore and William Mattson (Army Research Lab)
|
||||
|
||||
Please cite the related publications:
|
||||
J. P. Larentzos, J. K. Brennan, J. D. Moore, M. Lisal, W. D. Mattson,
|
||||
"Parallel implementation of isothermal and isoenergetic Dissipative
|
||||
Particle Dynamics using Shardlow-like splitting algorithms",
|
||||
Computer Physics Communications, 2014, 185, pp 1987--1998.
|
||||
|
||||
M. Lisal, J. K. Brennan, J. Bonet Avalos, "Dissipative particle dynamics
|
||||
at isothermal, isobaric, isoenergetic, and isoenthalpic conditions using
|
||||
Shardlow-like splitting algorithms", Journal of Chemical Physics, 2011,
|
||||
135, 204105.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "fix_shardlow_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include <math.h>
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "memory.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
// #include "pair_dpd_fdt.h"
|
||||
#include "pair_dpd_fdt_energy_kokkos.h"
|
||||
#include "pair.h"
|
||||
#include "npair_ssa_kokkos.h"
|
||||
#include "citeme.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
#define EPSILON 1.0e-10
|
||||
#define EPSILON_SQUARED ((EPSILON) * (EPSILON))
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0)
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
if (narg != 3) error->all(FLERR,"Illegal fix shardlow command");
|
||||
|
||||
// k_pairDPD = NULL;
|
||||
k_pairDPDE = NULL;
|
||||
// k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1);
|
||||
k_pairDPDE = dynamic_cast<PairDPDfdtEnergyKokkos<DeviceType> *>(force->pair_match("dpd/fdt/energy",0));
|
||||
|
||||
// if(k_pairDPDE){
|
||||
comm_forward = 3;
|
||||
comm_reverse = 5;
|
||||
maxRNG = 0;
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
pp_random = NULL;
|
||||
#endif
|
||||
// } else {
|
||||
// comm_forward = 3;
|
||||
// comm_reverse = 3;
|
||||
// }
|
||||
|
||||
|
||||
if(/* k_pairDPD == NULL &&*/ k_pairDPDE == NULL)
|
||||
error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk");
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3);
|
||||
d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_counters = Kokkos::create_mirror_view(d_counters);
|
||||
h_hist = Kokkos::create_mirror_view(d_hist);
|
||||
#else
|
||||
h_counters = d_counters;
|
||||
h_hist = d_hist;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixShardlowKokkos<DeviceType>::~FixShardlowKokkos()
|
||||
{
|
||||
ghostmax = 0;
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
if (pp_random) {
|
||||
for (int i = 1; i < maxRNG; ++i) delete pp_random[i];
|
||||
delete[] pp_random;
|
||||
pp_random = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixShardlowKokkos<DeviceType>::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= INITIAL_INTEGRATE | PRE_NEIGHBOR;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::init()
|
||||
{
|
||||
FixShardlow::init();
|
||||
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
// neighbor->requests[irequest]->pair = 0;
|
||||
// neighbor->requests[irequest]->fix = 1;
|
||||
// neighbor->requests[irequest]->ghost= 1;
|
||||
// neighbor->requests[irequest]->ssa = 1;
|
||||
|
||||
int ntypes = atom->ntypes;
|
||||
k_params = Kokkos::DualView<params_ssa**,Kokkos::LayoutRight,DeviceType>
|
||||
("FixShardlowKokkos::params",ntypes+1,ntypes+1);
|
||||
params = k_params.template view<DeviceType>();
|
||||
k_pairDPDE->k_cutsq.template sync<DeviceType>();
|
||||
d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>();
|
||||
|
||||
const double boltz2 = 2.0*force->boltz;
|
||||
for (int i = 1; i <= ntypes; i++) {
|
||||
for (int j = i; j <= ntypes; j++) {
|
||||
F_FLOAT cutone = k_pairDPDE->cut[i][j];
|
||||
if (cutone > EPSILON) k_params.h_view(i,j).cutinv = 1.0/cutone;
|
||||
else k_params.h_view(i,j).cutinv = FLT_MAX;
|
||||
k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j];
|
||||
k_params.h_view(i,j).kappa = k_pairDPDE->kappa[i][j];
|
||||
k_params.h_view(i,j).alpha = sqrt(boltz2*k_pairDPDE->kappa[i][j]);
|
||||
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = k_pairDPDE->k_cutsq.h_view(i,j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k_params.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
FixShardlow::init_list(id, ptr);
|
||||
k_list = static_cast<NeighListKokkos<DeviceType>*>(ptr);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::pre_neighbor()
|
||||
{
|
||||
// NOTE: this logic is specific to orthogonal boxes, not triclinic
|
||||
|
||||
// Enforce the constraint that ghosts must be contained in the nearest sub-domains
|
||||
double bbx = domain->subhi[0] - domain->sublo[0];
|
||||
double bby = domain->subhi[1] - domain->sublo[1];
|
||||
double bbz = domain->subhi[2] - domain->sublo[2];
|
||||
|
||||
double rcut = 2.0*neighbor->cutneighmax;
|
||||
|
||||
if (domain->triclinic)
|
||||
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
|
||||
|
||||
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
|
||||
{
|
||||
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
|
||||
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
|
||||
sprintf(msg, fmt, rcut, bbx, bby, bbz);
|
||||
error->one(FLERR, msg);
|
||||
}
|
||||
|
||||
nlocal = atomKK->nlocal;
|
||||
nghost = atomKK->nghost;
|
||||
|
||||
// Allocate memory for h_v_t0 to hold the initial velocities for the ghosts
|
||||
if (nghost > ghostmax) {
|
||||
ghostmax = nghost;
|
||||
k_v_t0 = DAT::tdual_v_array("FixShardlowKokkos:v_t0", ghostmax);
|
||||
// d_v_t0 = k_v_t0.template view<DeviceType>();
|
||||
h_v_t0 = k_v_t0.h_view;
|
||||
}
|
||||
|
||||
// Setup views of relevant data
|
||||
x = atomKK->k_x.template view<DeviceType>();
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
h_v = atomKK->k_v.h_view;
|
||||
uCond = atomKK->k_uCond.template view<DeviceType>();
|
||||
h_uCond = atomKK->k_uCond.h_view;
|
||||
uMech = atomKK->k_uMech.template view<DeviceType>();
|
||||
h_uMech = atomKK->k_uMech.h_view;
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
if (atomKK->rmass) {
|
||||
massPerI = true;
|
||||
masses = atomKK->k_rmass.view<DeviceType>();
|
||||
} else {
|
||||
massPerI = false;
|
||||
masses = atomKK->k_mass.view<DeviceType>();
|
||||
}
|
||||
// if(k_pairDPDE){
|
||||
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
|
||||
//} else {
|
||||
//}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::setup_pre_neighbor()
|
||||
{
|
||||
pre_neighbor();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
|
||||
#error "FixShardlowKokkos::ssa_update_dpd() is not functional yet - TIM 20170830"
|
||||
/* ----------------------------------------------------------------------
|
||||
Perform the stochastic integration and Shardlow update for constant temperature
|
||||
Allow for both per-type and per-atom mass
|
||||
|
||||
NOTE: only implemented for orthogonal boxes, not triclinic
|
||||
------------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
|
||||
int start_ii, int count, int id
|
||||
)
|
||||
{
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
class RanMars *pRNG = pp_random[id];
|
||||
#else
|
||||
rand_type rand_gen = rand_pool.get_state(id);
|
||||
#endif
|
||||
|
||||
int ct = count;
|
||||
int ii = start_ii;
|
||||
|
||||
while (ct-- > 0) {
|
||||
const int i = d_ilist(ii);
|
||||
const int jlen = d_numneigh(ii);
|
||||
|
||||
const double xtmp = x(i, 0);
|
||||
const double ytmp = x(i, 1);
|
||||
const double ztmp = x(i, 2);
|
||||
|
||||
// load velocity for i from memory
|
||||
double vxi = v(i, 0);
|
||||
double vyi = v(i, 1);
|
||||
double vzi = v(i, 2);
|
||||
|
||||
const int itype = type(i);
|
||||
|
||||
const double mass_i = masses(massPerI ? i : itype);
|
||||
const double massinv_i = 1.0 / mass_i;
|
||||
|
||||
// Loop over Directional Neighbors only
|
||||
for (int jj = 0; jj < jlen; jj++) {
|
||||
const int j = d_neighbors(ii,jj) & NEIGHMASK;
|
||||
int jtype = type[j];
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(0, 2)));
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
Kokkos::atomic_increment(&(d_hist(rsqi)));
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
|
||||
&& (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(1, 2)));
|
||||
#endif
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
double delx_rinv = delx*rinv;
|
||||
double dely_rinv = dely*rinv;
|
||||
double delz_rinv = delz*rinv;
|
||||
|
||||
double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv);
|
||||
double wdt = wr*wr*dt;
|
||||
|
||||
double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma;
|
||||
double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv;
|
||||
|
||||
double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v *
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
pRNG->gaussian();
|
||||
#else
|
||||
rand_gen.normal();
|
||||
#endif
|
||||
|
||||
const double mass_j = masses(massPerI ? j : jtype);
|
||||
double massinv_j = 1.0 / mass_j;
|
||||
|
||||
double gammaFactor = halfgamma_ij*wdt*ftm2v;
|
||||
double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor);
|
||||
|
||||
double vxj = v(j, 0);
|
||||
double vyj = v(j, 1);
|
||||
double vzj = v(j, 2);
|
||||
|
||||
// Compute the initial velocity difference between atom i and atom j
|
||||
double delvx = vxi - vxj;
|
||||
double delvy = vyi - vyj;
|
||||
double delvz = vzi - vzj;
|
||||
double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz);
|
||||
|
||||
// Compute momentum change between t and t+dt
|
||||
double factorA = sigmaRand - gammaFactor*dot_rinv;
|
||||
|
||||
// Update the velocity on i
|
||||
vxi += delx_rinv*factorA*massinv_i;
|
||||
vyi += dely_rinv*factorA*massinv_i;
|
||||
vzi += delz_rinv*factorA*massinv_i;
|
||||
|
||||
// Update the velocity on j
|
||||
vxj -= delx_rinv*factorA*massinv_j;
|
||||
vyj -= dely_rinv*factorA*massinv_j;
|
||||
vzj -= delz_rinv*factorA*massinv_j;
|
||||
|
||||
//ii. Compute the new velocity diff
|
||||
delvx = vxi - vxj;
|
||||
delvy = vyi - vyj;
|
||||
delvz = vzi - vzj;
|
||||
dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz;
|
||||
|
||||
// Compute the new momentum change between t and t+dt
|
||||
double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor;
|
||||
|
||||
// Update the velocity on i
|
||||
vxi += delx_rinv*factorB*massinv_i;
|
||||
vyi += dely_rinv*factorB*massinv_i;
|
||||
vzi += delz_rinv*factorB*massinv_i;
|
||||
|
||||
// Update the velocity on j
|
||||
vxj -= delx_rinv*factorB*massinv_j;
|
||||
vyj -= dely_rinv*factorB*massinv_j;
|
||||
vzj -= delz_rinv*factorB*massinv_j;
|
||||
|
||||
// Store updated velocity for j
|
||||
v(j, 0) = vxj;
|
||||
v(j, 1) = vyj;
|
||||
v(j, 2) = vzj;
|
||||
}
|
||||
}
|
||||
// store updated velocity for i
|
||||
v(i, 0) = vxi;
|
||||
v(i, 1) = vyi;
|
||||
v(i, 2) = vzi;
|
||||
}
|
||||
|
||||
#ifndef DPD_USE_RAN_MARS
|
||||
rand_pool.free_state(rand_gen);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Perform the stochastic integration and Shardlow update for constant energy
|
||||
Allow for both per-type and per-atom mass
|
||||
|
||||
NOTE: only implemented for orthogonal boxes, not triclinic
|
||||
------------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
|
||||
int start_ii, int count, int id
|
||||
) const
|
||||
{
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
class RanMars *pRNG = pp_random[id];
|
||||
#else
|
||||
rand_type rand_gen = rand_pool.get_state(id);
|
||||
#endif
|
||||
|
||||
int ct = count;
|
||||
int ii = start_ii;
|
||||
|
||||
while (ct-- > 0) {
|
||||
const int i = d_ilist(ii);
|
||||
const int jlen = d_numneigh(ii);
|
||||
|
||||
const double xtmp = x(i, 0);
|
||||
const double ytmp = x(i, 1);
|
||||
const double ztmp = x(i, 2);
|
||||
|
||||
// load velocity for i from memory
|
||||
double vxi = v(i, 0);
|
||||
double vyi = v(i, 1);
|
||||
double vzi = v(i, 2);
|
||||
|
||||
double uMech_i = uMech(i);
|
||||
double uCond_i = uCond(i);
|
||||
const int itype = type(i);
|
||||
|
||||
const double theta_i_inv = 1.0/dpdTheta(i);
|
||||
const double mass_i = masses(massPerI ? i : itype);
|
||||
const double massinv_i = 1.0 / mass_i;
|
||||
const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v;
|
||||
|
||||
// Loop over Directional Neighbors only
|
||||
for (int jj = 0; jj < jlen; jj++) {
|
||||
const int j = d_neighbors(ii,jj) & NEIGHMASK;
|
||||
const int jtype = type(j);
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(0, 2)));
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
Kokkos::atomic_increment(&(d_hist(rsqi)));
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
|
||||
&& (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(1, 2)));
|
||||
#endif
|
||||
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
double delx_rinv = delx*rinv;
|
||||
double dely_rinv = dely*rinv;
|
||||
double delz_rinv = delz*rinv;
|
||||
|
||||
double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv);
|
||||
double wdt = wr*wr*dt;
|
||||
|
||||
// Compute the current temperature
|
||||
double theta_j_inv = 1.0/dpdTheta(j);
|
||||
double theta_ij_inv = 0.5*(theta_i_inv + theta_j_inv);
|
||||
|
||||
double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma;
|
||||
double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv;
|
||||
|
||||
double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v *
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
pRNG->gaussian();
|
||||
#else
|
||||
rand_gen.normal();
|
||||
#endif
|
||||
|
||||
const double mass_j = masses(massPerI ? j : jtype);
|
||||
double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v;
|
||||
double massinv_j = 1.0 / mass_j;
|
||||
|
||||
// Compute uCond
|
||||
double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa;
|
||||
double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha;
|
||||
double del_uCond = alpha_ij*wr*dtsqrt *
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
pRNG->gaussian();
|
||||
#else
|
||||
rand_gen.normal();
|
||||
#endif
|
||||
|
||||
del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt;
|
||||
uCond[j] -= del_uCond;
|
||||
uCond_i += del_uCond;
|
||||
|
||||
double gammaFactor = halfgamma_ij*wdt*ftm2v;
|
||||
double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor);
|
||||
|
||||
double vxj = v(j, 0);
|
||||
double vyj = v(j, 1);
|
||||
double vzj = v(j, 2);
|
||||
double dot4 = vxj*vxj + vyj*vyj + vzj*vzj;
|
||||
double dot3 = vxi*vxi + vyi*vyi + vzi*vzi;
|
||||
|
||||
// Compute the initial velocity difference between atom i and atom j
|
||||
double delvx = vxi - vxj;
|
||||
double delvy = vyi - vyj;
|
||||
double delvz = vzi - vzj;
|
||||
double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz);
|
||||
|
||||
// Compute momentum change between t and t+dt
|
||||
double factorA = sigmaRand - gammaFactor*dot_rinv;
|
||||
|
||||
// Update the velocity on i
|
||||
vxi += delx_rinv*factorA*massinv_i;
|
||||
vyi += dely_rinv*factorA*massinv_i;
|
||||
vzi += delz_rinv*factorA*massinv_i;
|
||||
|
||||
// Update the velocity on j
|
||||
vxj -= delx_rinv*factorA*massinv_j;
|
||||
vyj -= dely_rinv*factorA*massinv_j;
|
||||
vzj -= delz_rinv*factorA*massinv_j;
|
||||
|
||||
//ii. Compute the new velocity diff
|
||||
delvx = vxi - vxj;
|
||||
delvy = vyi - vyj;
|
||||
delvz = vzi - vzj;
|
||||
dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz;
|
||||
|
||||
// Compute the new momentum change between t and t+dt
|
||||
double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor;
|
||||
|
||||
// Update the velocity on i
|
||||
vxi += delx_rinv*factorB*massinv_i;
|
||||
vyi += dely_rinv*factorB*massinv_i;
|
||||
vzi += delz_rinv*factorB*massinv_i;
|
||||
double partial_uMech = (vxi*vxi + vyi*vyi + vzi*vzi - dot3)*massinv_j;
|
||||
|
||||
// Update the velocity on j
|
||||
vxj -= delx_rinv*factorB*massinv_j;
|
||||
vyj -= dely_rinv*factorB*massinv_j;
|
||||
vzj -= delz_rinv*factorB*massinv_j;
|
||||
partial_uMech += (vxj*vxj + vyj*vyj + vzj*vzj - dot4)*massinv_i;
|
||||
|
||||
// Store updated velocity for j
|
||||
v(j, 0) = vxj;
|
||||
v(j, 1) = vyj;
|
||||
v(j, 2) = vzj;
|
||||
|
||||
// Compute uMech
|
||||
double del_uMech = partial_uMech*mass_ij_div_neg4_ftm2v;
|
||||
uMech_i += del_uMech;
|
||||
uMech(j) += del_uMech;
|
||||
}
|
||||
}
|
||||
// store updated velocity for i
|
||||
v(i, 0) = vxi;
|
||||
v(i, 1) = vyi;
|
||||
v(i, 2) = vzi;
|
||||
// store updated uMech and uCond for i
|
||||
uMech(i) = uMech_i;
|
||||
uCond(i) = uCond_i;
|
||||
ii++;
|
||||
}
|
||||
|
||||
#ifndef DPD_USE_RAN_MARS
|
||||
rand_pool.free_state(rand_gen);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
|
||||
{
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
copymode = 1;
|
||||
|
||||
dtsqrt = sqrt(update->dt);
|
||||
|
||||
NPairSSAKokkos<DeviceType> *np_ssa = dynamic_cast<NPairSSAKokkos<DeviceType>*>(list->np);
|
||||
if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairSSAKokkos object");
|
||||
ssa_phaseCt = np_ssa->ssa_phaseCt;
|
||||
ssa_phaseLen = np_ssa->ssa_phaseLen;
|
||||
ssa_itemLoc = np_ssa->ssa_itemLoc;
|
||||
ssa_itemLen = np_ssa->ssa_itemLen;
|
||||
ssa_gphaseCt = np_ssa->ssa_gphaseCt;
|
||||
ssa_gphaseLen = np_ssa->ssa_gphaseLen;
|
||||
ssa_gitemLoc = np_ssa->ssa_gitemLoc;
|
||||
ssa_gitemLen = np_ssa->ssa_gitemLen;
|
||||
|
||||
np_ssa->k_ssa_itemLoc.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_itemLen.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_gitemLoc.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_gitemLen.template sync<DeviceType>();
|
||||
|
||||
np_ssa->k_ssa_phaseLen.template sync<LMPHostType>();
|
||||
np_ssa->k_ssa_gphaseLen.template sync<LMPHostType>();
|
||||
auto h_ssa_phaseLen = np_ssa->k_ssa_phaseLen.h_view;
|
||||
auto h_ssa_gphaseLen = np_ssa->k_ssa_gphaseLen.h_view;
|
||||
|
||||
int maxWorkItemCt = (int) ssa_itemLoc.dimension_1();
|
||||
if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) {
|
||||
maxWorkItemCt = (int) ssa_gitemLoc.dimension_1();
|
||||
}
|
||||
if (maxWorkItemCt > maxRNG) {
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
if (pp_random) {
|
||||
for (int i = 1; i < maxRNG; ++i) delete pp_random[i];
|
||||
delete[] pp_random;
|
||||
pp_random = NULL;
|
||||
}
|
||||
pp_random = new RanMars*[maxWorkItemCt];
|
||||
for (int i = 1; i < maxWorkItemCt; ++i) {
|
||||
pp_random[i] = new RanMars(lmp, k_pairDPDE->seed + comm->me + comm->nprocs*i);
|
||||
}
|
||||
pp_random[0] = k_pairDPDE->random;
|
||||
#else
|
||||
rand_pool.init(k_pairDPDE->seed + comm->me, maxWorkItemCt);
|
||||
#endif
|
||||
maxRNG = maxWorkItemCt;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
for (int i = 0; i < 2; ++i)
|
||||
for (int j = 0; j < 3; ++j)
|
||||
h_counters(i,j) = 0;
|
||||
for (int i = 0; i < 32; ++i) h_hist[i] = 0;
|
||||
deep_copy(d_counters, h_counters);
|
||||
deep_copy(d_hist, h_hist);
|
||||
#endif
|
||||
|
||||
//theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
|
||||
boltz_inv = 1.0/force->boltz;
|
||||
ftm2v = force->ftm2v;
|
||||
dt = update->dt;
|
||||
|
||||
k_params.template sync<DeviceType>();
|
||||
|
||||
// process neighbors in the local AIR
|
||||
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
|
||||
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int workItemCt = h_ssa_phaseLen[workPhase];
|
||||
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
|
||||
}
|
||||
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
//Loop over all 13 outward directions (7 stages)
|
||||
for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
|
||||
// int airnum = workPhase + 1;
|
||||
int workItemCt = h_ssa_gphaseLen[workPhase];
|
||||
|
||||
// Communicate the updated velocities to all nodes
|
||||
atomKK->sync(Host,V_MASK);
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->modified(Host,V_MASK);
|
||||
|
||||
if(k_pairDPDE){
|
||||
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
|
||||
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
|
||||
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
|
||||
|
||||
// must capture local variables, not class variables
|
||||
atomKK->sync(execution_space,UCOND_MASK | UMECH_MASK);
|
||||
auto l_uCond = uCond;
|
||||
auto l_uMech = uMech;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) {
|
||||
l_uCond(i) = 0.0;
|
||||
l_uMech(i) = 0.0;
|
||||
});
|
||||
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK);
|
||||
}
|
||||
|
||||
// process neighbors in this AIR
|
||||
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
|
||||
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
// Communicate the ghost deltas to the atom owners
|
||||
atomKK->sync(Host,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
comm->reverse_comm_fix(this);
|
||||
atomKK->modified(Host,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
deep_copy(h_counters, d_counters);
|
||||
deep_copy(h_hist, d_hist);
|
||||
for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", h_hist[i]);
|
||||
fprintf(stdout, "\n%6d %6d,%6d %6d: "
|
||||
,h_counters(0, 2)
|
||||
,h_counters(1, 2)
|
||||
,h_counters(0, 1)
|
||||
,h_counters(1, 1)
|
||||
);
|
||||
#endif
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int &workItem) const {
|
||||
const int ct = ssa_itemLen(workPhase, workItem);
|
||||
const int ii = ssa_itemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int &workItem) const {
|
||||
const int ct = ssa_gitemLen(workPhase, workItem);
|
||||
const int ii = ssa_gitemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixShardlowKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
|
||||
{
|
||||
int ii,jj,m;
|
||||
|
||||
m = 0;
|
||||
for (ii = 0; ii < n; ii++) {
|
||||
jj = list[ii];
|
||||
buf[m++] = h_v(jj, 0);
|
||||
buf[m++] = h_v(jj, 1);
|
||||
buf[m++] = h_v(jj, 2);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
|
||||
{
|
||||
int ii,m,last;
|
||||
|
||||
m = 0;
|
||||
last = first + n ;
|
||||
for (ii = first; ii < last; ii++) {
|
||||
h_v_t0(ii - nlocal, 0) = h_v(ii, 0) = buf[m++];
|
||||
h_v_t0(ii - nlocal, 1) = h_v(ii, 1) = buf[m++];
|
||||
h_v_t0(ii - nlocal, 2) = h_v(ii, 2) = buf[m++];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixShardlowKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0);
|
||||
buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1);
|
||||
buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2);
|
||||
if(k_pairDPDE){
|
||||
buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta
|
||||
buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta
|
||||
}
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixShardlowKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
|
||||
h_v(j, 0) += buf[m++];
|
||||
h_v(j, 1) += buf[m++];
|
||||
h_v(j, 2) += buf[m++];
|
||||
if(k_pairDPDE){
|
||||
h_uCond(j) += buf[m++]; // add in the accumulated delta
|
||||
h_uMech(j) += buf[m++]; // add in the accumulated delta
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixShardlowKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = 0.0;
|
||||
bytes += sizeof(double)*3*ghostmax; // v_t0[]
|
||||
return bytes;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixShardlowKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixShardlowKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(shardlow/kk,FixShardlowKokkos<LMPDeviceType>)
|
||||
FixStyle(shardlow/kk/device,FixShardlowKokkos<LMPDeviceType>)
|
||||
FixStyle(shardlow/kk/host,FixShardlowKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_SHARDLOW_KOKKOS_H
|
||||
#define LMP_FIX_SHARDLOW_KOKKOS_H
|
||||
|
||||
#include "float.h"
|
||||
#include "fix_shardlow.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
|
||||
#include "pair_dpd_fdt_kokkos.h"
|
||||
#endif
|
||||
#include "pair_dpd_fdt_energy_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
struct TagFixShardlowSSAUpdateDPDE{};
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
struct TagFixShardlowSSAUpdateDPDEGhost{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixShardlowKokkos : public FixShardlow {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
NeighListKokkos<DeviceType> *k_list; // The SSA specific neighbor list
|
||||
|
||||
FixShardlowKokkos(class LAMMPS *, int, char **);
|
||||
~FixShardlowKokkos();
|
||||
int setmask();
|
||||
virtual void init();
|
||||
virtual void init_list(int, class NeighList *);
|
||||
virtual void initial_integrate(int);
|
||||
void setup_pre_neighbor();
|
||||
void pre_neighbor();
|
||||
|
||||
double memory_usage();
|
||||
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
int pack_forward_comm(int , int *, double *, int, int *);
|
||||
void unpack_forward_comm(int , int , double *);
|
||||
|
||||
struct params_ssa {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_ssa(int i){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
F_FLOAT cutinv,halfsigma,kappa,alpha;
|
||||
};
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int&) const;
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int&) const;
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
typename AT::t_int_2d d_counters;
|
||||
typename HAT::t_int_2d h_counters;
|
||||
typename AT::t_int_1d d_hist;
|
||||
typename HAT::t_int_1d h_hist;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
int workPhase;
|
||||
double theta_ij_inv,boltz_inv,ftm2v,dt;
|
||||
|
||||
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
|
||||
// class PairDPDfdt *pairDPD; FIXME as per k_pairDPDE below
|
||||
#endif
|
||||
PairDPDfdtEnergyKokkos<DeviceType> *k_pairDPDE;
|
||||
|
||||
int maxRNG;
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
class RanMars **pp_random;
|
||||
#elif defined(DPD_USE_Random_XorShift1024)
|
||||
Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
|
||||
#else
|
||||
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
|
||||
#endif
|
||||
|
||||
Kokkos::DualView<params_ssa**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_ssa**,
|
||||
Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
|
||||
// hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types
|
||||
params_ssa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
typename DAT::tdual_v_array k_v_t0;
|
||||
// typename AT::t_v_array d_v_t0; v_t0 only used in comm routines (on host)
|
||||
typename HAT::t_v_array h_v_t0;
|
||||
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename HAT::t_v_array h_v;
|
||||
typename AT::t_efloat_1d uCond, uMech;
|
||||
typename HAT::t_efloat_1d h_uCond, h_uMech;
|
||||
typename AT::t_int_1d type;
|
||||
bool massPerI;
|
||||
typename AT::t_float_1d_randomread masses;
|
||||
typename AT::t_efloat_1d dpdTheta;
|
||||
|
||||
double dtsqrt; // = sqrt(update->dt);
|
||||
int ghostmax;
|
||||
int nlocal, nghost;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist, d_numneigh;
|
||||
|
||||
int ssa_phaseCt;
|
||||
typename AT::t_int_1d ssa_phaseLen;
|
||||
typename AT::t_int_2d ssa_itemLoc, ssa_itemLen;
|
||||
|
||||
int ssa_gphaseCt;
|
||||
typename AT::t_int_1d ssa_gphaseLen;
|
||||
typename AT::t_int_2d ssa_gitemLoc, ssa_gitemLen;
|
||||
|
||||
|
||||
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ssa_update_dpd(int, int, int) const; // Constant Temperature
|
||||
#endif
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ssa_update_dpde(int, int, int) const; // Constant Energy
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Must use dpd/fdt pair_style with fix shardlow
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow
|
||||
|
||||
E: A deterministic integrator must be specified after fix shardlow in input
|
||||
file (e.g. fix nve or fix nph).
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot use constant temperature integration routines with DPD
|
||||
|
||||
Self-explanatory. Must use deterministic integrators such as nve or nph
|
||||
|
||||
E: Fix shardlow does not yet support triclinic geometries
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either
|
||||
reduce the number of processors requested, or change the cutoff/skin
|
||||
|
||||
The Shardlow splitting algorithm requires the size of the sub-domain lengths
|
||||
to be are larger than twice the cutoff+skin. Generally, the domain decomposition
|
||||
is dependant on the number of processors requested.
|
||||
|
||||
*/
|
|
@ -0,0 +1,103 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include "fix_wall_lj93_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
FixWallLJ93Kokkos<DeviceType>::FixWallLJ93Kokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixWallLJ93(lmp, narg, arg)
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
interaction of all particles in group with a wall
|
||||
m = index of wall coeffs
|
||||
which = xlo,xhi,ylo,yhi,zlo,zhi
|
||||
error if any particle is on or behind wall
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class DeviceType>
|
||||
void FixWallLJ93Kokkos<DeviceType>::wall_particle(int m_in, int which, double coord_in)
|
||||
{
|
||||
m = m_in;
|
||||
coord = coord_in;
|
||||
|
||||
atomKK->sync(execution_space, X_MASK|F_MASK|MASK_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
DAT::tdual_int_scalar k_oneflag = DAT::tdual_int_scalar("fix:oneflag");
|
||||
d_oneflag = k_oneflag.view<DeviceType>();
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
dim = which / 2;
|
||||
side = which % 2;
|
||||
if (side == 0) side = -1;
|
||||
|
||||
copymode = 1;
|
||||
FixWallLJ93KokkosFunctor<DeviceType> wp_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,wp_functor,ewall);
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(execution_space, F_MASK);
|
||||
|
||||
k_oneflag.template modify<DeviceType>();
|
||||
k_oneflag.template sync<LMPHostType>();
|
||||
if (k_oneflag.h_view()) error->one(FLERR,"Particle on or inside fix wall surface");
|
||||
}
|
||||
|
||||
template <class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixWallLJ93Kokkos<DeviceType>::wall_particle_item(int i, value_type ewall) const {
|
||||
if (mask(i) & groupbit) {
|
||||
double delta;
|
||||
if (side < 0) delta = x(i,dim) - coord;
|
||||
else delta = coord - x(i,dim);
|
||||
if (delta >= cutoff[m]) return;
|
||||
if (delta <= 0.0) {
|
||||
d_oneflag() = 1;
|
||||
return;
|
||||
}
|
||||
double rinv = 1.0/delta;
|
||||
double r2inv = rinv*rinv;
|
||||
double r4inv = r2inv*r2inv;
|
||||
double r10inv = r4inv*r4inv*r2inv;
|
||||
double fwall = side * (coeff1[m]*r10inv - coeff2[m]*r4inv);
|
||||
f(i,dim) -= fwall;
|
||||
ewall[0] += coeff3[m]*r4inv*r4inv*rinv -
|
||||
coeff4[m]*r2inv*rinv - offset[m];
|
||||
ewall[m+1] += fwall;
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixWallLJ93Kokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixWallLJ93Kokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(wall/lj93/kk,FixWallLJ93Kokkos<LMPDeviceType>)
|
||||
FixStyle(wall/lj93/kk/device,FixWallLJ93Kokkos<LMPDeviceType>)
|
||||
FixStyle(wall/lj93/kk/host,FixWallLJ93Kokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_WALL_LJ93_KOKKOS_H
|
||||
#define LMP_FIX_WALL_LJ93_KOKKOS_H
|
||||
|
||||
#include "fix_wall_lj93.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template <class DeviceType>
|
||||
class FixWallLJ93Kokkos : public FixWallLJ93 {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef double value_type[];
|
||||
|
||||
FixWallLJ93Kokkos(class LAMMPS *, int, char **);
|
||||
void wall_particle(int, int, double);
|
||||
|
||||
int m;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void wall_particle_item(int, value_type) const;
|
||||
|
||||
private:
|
||||
int dim,side;
|
||||
double coord;
|
||||
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d mask;
|
||||
typename AT::t_int_scalar d_oneflag;
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixWallLJ93KokkosFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
typedef double value_type[];
|
||||
const int value_count;
|
||||
|
||||
FixWallLJ93Kokkos<DeviceType> c;
|
||||
FixWallLJ93KokkosFunctor(FixWallLJ93Kokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr),
|
||||
value_count(c_ptr->m+1) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type ewall) const {
|
||||
c.wall_particle_item(i,ewall);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Particle on or inside fix wall surface
|
||||
|
||||
Particles must be "exterior" to the wall in order for energy/force to
|
||||
be calculated.
|
||||
|
||||
*/
|
|
@ -95,7 +95,6 @@ void NBinKokkos<DeviceType>::bin_atoms()
|
|||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
|
@ -106,7 +105,6 @@ void NBinKokkos<DeviceType>::bin_atoms()
|
|||
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(h_resize, d_resize);
|
||||
if(h_resize()) {
|
||||
|
|
|
@ -0,0 +1,307 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nbin_ssa_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "group.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
// #include "memory.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
|
||||
{
|
||||
atoms_per_bin = ghosts_per_gbin = 16;
|
||||
|
||||
d_resize = typename AT::t_int_scalar("NBinSSAKokkos::d_resize");
|
||||
d_lbinxlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxlo");
|
||||
d_lbinylo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinylo");
|
||||
d_lbinzlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzlo");
|
||||
d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi");
|
||||
d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi");
|
||||
d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo);
|
||||
h_lbinylo = Kokkos::create_mirror_view(d_lbinylo);
|
||||
h_lbinzlo = Kokkos::create_mirror_view(d_lbinzlo);
|
||||
h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi);
|
||||
h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi);
|
||||
h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
h_lbinxlo = d_lbinxlo;
|
||||
h_lbinylo = d_lbinylo;
|
||||
h_lbinzlo = d_lbinzlo;
|
||||
h_lbinxhi = d_lbinxhi;
|
||||
h_lbinyhi = d_lbinyhi;
|
||||
h_lbinzhi = d_lbinzhi;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
|
||||
k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8);
|
||||
gbincount = k_gbincount.view<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinSSAKokkos<DeviceType>::bin_atoms_setup(int nall)
|
||||
{
|
||||
if (mbins > (int) k_bins.h_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("NBinSSAKokkos::bins",mbins,atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
|
||||
k_bincount = DAT::tdual_int_1d("NBinSSAKokkos::bincount",mbins);
|
||||
bincount = k_bincount.view<DeviceType>();
|
||||
}
|
||||
|
||||
ghosts_per_gbin = atom->nghost / 7; // estimate needed size
|
||||
|
||||
if (ghosts_per_gbin > (int) k_gbins.h_view.dimension_1()) {
|
||||
k_gbins = DAT::tdual_int_2d("NBinSSAKokkos::gbins",8,ghosts_per_gbin);
|
||||
gbins = k_gbins.view<DeviceType>();
|
||||
}
|
||||
|
||||
// Clear the local bin extent bounding box.
|
||||
h_lbinxlo() = mbinx - 1; // Safe to = stencil->sx + 1
|
||||
h_lbinylo() = mbiny - 1; // Safe to = stencil->sy + 1
|
||||
h_lbinzlo() = mbinz - 1; // Safe to = stencil->sz + 1
|
||||
h_lbinxhi() = 0; // Safe to = mbinx - stencil->sx - 1
|
||||
h_lbinyhi() = 0; // Safe to = mbiny - stencil->sy - 1
|
||||
h_lbinzhi() = 0; // Safe to = mbinz - stencil->sz - 1
|
||||
deep_copy(d_lbinxlo, h_lbinxlo);
|
||||
deep_copy(d_lbinylo, h_lbinylo);
|
||||
deep_copy(d_lbinzlo, h_lbinzlo);
|
||||
deep_copy(d_lbinxhi, h_lbinxhi);
|
||||
deep_copy(d_lbinyhi, h_lbinyhi);
|
||||
deep_copy(d_lbinzhi, h_lbinzhi);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
|
||||
local atoms are in distinct bins (binhead[]) from the ghosts
|
||||
ghost atoms are "binned" in gairhead_ssa[] instead
|
||||
ghosts which are not in an Active Interaction Region (AIR) are skipped
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinSSAKokkos<DeviceType>::bin_atoms()
|
||||
{
|
||||
last_bin = update->ntimestep;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nghost = atom->nghost;
|
||||
int nall = nlocal + nghost;
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
|
||||
sublo_[0] = domain->sublo[0];
|
||||
sublo_[1] = domain->sublo[1];
|
||||
sublo_[2] = domain->sublo[2];
|
||||
subhi_[0] = domain->subhi[0];
|
||||
subhi_[1] = domain->subhi[1];
|
||||
subhi_[2] = domain->subhi[2];
|
||||
|
||||
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
|
||||
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
|
||||
|
||||
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",nall);
|
||||
binID = k_binID.view<DeviceType>();
|
||||
|
||||
// find each local atom's binID
|
||||
{
|
||||
atoms_per_bin = 0;
|
||||
NPairSSAKokkosBinIDAtomsFunctor<DeviceType> f(*this);
|
||||
Kokkos::parallel_reduce(nlocal, f, atoms_per_bin);
|
||||
}
|
||||
deep_copy(h_lbinxlo, d_lbinxlo);
|
||||
deep_copy(h_lbinylo, d_lbinylo);
|
||||
deep_copy(h_lbinzlo, d_lbinzlo);
|
||||
deep_copy(h_lbinxhi, d_lbinxhi);
|
||||
deep_copy(h_lbinyhi, d_lbinyhi);
|
||||
deep_copy(h_lbinzhi, d_lbinzhi);
|
||||
|
||||
// find each ghost's binID (AIR number)
|
||||
{
|
||||
for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0;
|
||||
k_gbincount.modify<LMPHostType>();
|
||||
k_gbincount.sync<DeviceType>();
|
||||
ghosts_per_gbin = 0;
|
||||
NPairSSAKokkosBinIDGhostsFunctor<DeviceType> f(*this);
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nall), f, ghosts_per_gbin);
|
||||
}
|
||||
|
||||
// actually bin the ghost atoms
|
||||
{
|
||||
if(ghosts_per_gbin > (int) gbins.dimension_1()) {
|
||||
k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin);
|
||||
gbins = k_gbins.view<DeviceType>();
|
||||
}
|
||||
for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0;
|
||||
k_gbincount.modify<LMPHostType>();
|
||||
k_gbincount.sync<DeviceType>();
|
||||
|
||||
auto binID_ = binID;
|
||||
auto gbincount_ = gbincount;
|
||||
auto gbins_ = gbins;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nall),
|
||||
LAMMPS_LAMBDA (const int i) {
|
||||
const int iAIR = binID_(i);
|
||||
if (iAIR > 0) { // include only ghost atoms in an AIR
|
||||
const int ac = Kokkos::atomic_fetch_add(&gbincount_[iAIR], (int)1);
|
||||
gbins_(iAIR, ac) = i;
|
||||
}
|
||||
});
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(1,8),
|
||||
LAMMPS_LAMBDA (const int i) {
|
||||
sortBin(gbincount_, gbins_, i);
|
||||
});
|
||||
}
|
||||
c_gbins = gbins; // gbins won't change until the next bin_atoms
|
||||
|
||||
// actually bin the local atoms
|
||||
{
|
||||
if ((mbins > (int) bins.dimension_0()) ||
|
||||
(atoms_per_bin > (int) bins.dimension_1())) {
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
}
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
|
||||
auto bincount_ = bincount;
|
||||
auto bins_ = bins;
|
||||
|
||||
NPairSSAKokkosBinAtomsFunctor<DeviceType> f(*this);
|
||||
Kokkos::parallel_for(nlocal, f);
|
||||
|
||||
Kokkos::parallel_for(mbins,
|
||||
LAMMPS_LAMBDA (const int i) {
|
||||
sortBin(bincount_, bins_, i);
|
||||
});
|
||||
}
|
||||
k_bins.modify<DeviceType>();
|
||||
k_bincount.modify<DeviceType>();
|
||||
c_bins = bins; // bins won't change until the next bin_atoms
|
||||
|
||||
k_gbins.modify<DeviceType>();
|
||||
k_gbincount.modify<DeviceType>();
|
||||
|
||||
//now dispose of the k_binID array
|
||||
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0);
|
||||
binID = k_binID.view<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinSSAKokkos<DeviceType>::binAtomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = binID(i);
|
||||
const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1);
|
||||
bins(ibin, ac) = i;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinSSAKokkos<DeviceType>::binIDAtomsItem(const int &i, int &update) const
|
||||
{
|
||||
int loc[3];
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0]));
|
||||
binID(i) = ibin;
|
||||
|
||||
// Find the bounding box of the local atoms in the bins
|
||||
if (loc[0] < d_lbinxlo()) Kokkos::atomic_fetch_min(&d_lbinxlo(),loc[0]);
|
||||
if (loc[0] >= d_lbinxhi()) Kokkos::atomic_fetch_max(&d_lbinxhi(),loc[0] + 1);
|
||||
if (loc[1] < d_lbinylo()) Kokkos::atomic_fetch_min(&d_lbinylo(),loc[1]);
|
||||
if (loc[1] >= d_lbinyhi()) Kokkos::atomic_fetch_max(&d_lbinyhi(),loc[1] + 1);
|
||||
if (loc[2] < d_lbinzlo()) Kokkos::atomic_fetch_min(&d_lbinzlo(),loc[2]);
|
||||
if (loc[2] >= d_lbinzhi()) Kokkos::atomic_fetch_max(&d_lbinzhi(),loc[2] + 1);
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1);
|
||||
if (update <= ac) update = ac + 1;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinSSAKokkos<DeviceType>::binIDGhostsItem(const int &i, int &update) const
|
||||
{
|
||||
const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2));
|
||||
binID(i) = iAIR;
|
||||
if (iAIR > 0) { // include only ghost atoms in an AIR
|
||||
const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1);
|
||||
if (update <= ac) update = ac + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// An implementation of heapsort without recursion
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinSSAKokkos<DeviceType>::sortBin(
|
||||
typename AT::t_int_1d gbincount,
|
||||
typename AT::t_int_2d gbins,
|
||||
const int &ibin)
|
||||
{
|
||||
int n = gbincount(ibin);
|
||||
int i = n/2;
|
||||
int t;
|
||||
|
||||
do { /* Loops until bin is sorted */
|
||||
if (i > 0) { /* First stage - Sorting the heap */
|
||||
i--; /* Save its index to i */
|
||||
t = gbins(ibin, i); /* Save parent value to t */
|
||||
} else { /* Second stage - Extracting elements in-place */
|
||||
if ((--n) <= 0) return; /* When the heap is empty, we are done */
|
||||
t = gbins(ibin, n); /* Save last value (it will be overwritten) */
|
||||
gbins(ibin, n) = gbins(ibin, 0); /* Save largest value at the end of the bin */
|
||||
}
|
||||
int parent = i; /* We will start pushing down t from parent */
|
||||
int child = i*2 + 1; /* parent's left child */
|
||||
/* Sift operation - pushing the value of t down the heap */
|
||||
while (child < n) {
|
||||
/* Choose the largest child */
|
||||
if ((child + 1 < n) && (gbins(ibin, child + 1) > gbins(ibin, child))) ++child;
|
||||
if (gbins(ibin, child) <= t) break; /* t's place is found */
|
||||
gbins(ibin, parent) = gbins(ibin, child); /* Move the largest child up */
|
||||
parent = child; /* Move parent pointer to this child */
|
||||
child = parent*2+1; /* Find the next child */
|
||||
}
|
||||
gbins(ibin, parent) = t; /* We save t in the heap */
|
||||
} while(1);
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NBinSSAKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NBinSSAKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,246 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NBIN_CLASS
|
||||
|
||||
NBinStyle(ssa/kk/host,
|
||||
NBinSSAKokkos<LMPHostType>,
|
||||
NB_SSA | NB_KOKKOS_HOST)
|
||||
|
||||
NBinStyle(ssa/kk/device,
|
||||
NBinSSAKokkos<LMPDeviceType>,
|
||||
NB_SSA | NB_KOKKOS_DEVICE)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NBIN_SSA_KOKKOS_H
|
||||
#define LMP_NBIN_SSA_KOKKOS_H
|
||||
|
||||
#include "nbin_standard.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NBinSSAKokkos : public NBinStandard {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
NBinSSAKokkos(class LAMMPS *);
|
||||
~NBinSSAKokkos() {}
|
||||
void bin_atoms_setup(int);
|
||||
void bin_atoms();
|
||||
|
||||
// temporary array to hold the binID for each atom
|
||||
DAT::tdual_int_1d k_binID;
|
||||
typename AT::t_int_1d binID;
|
||||
typename AT::t_int_1d_const c_binID;
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
typename AT::t_int_1d bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
|
||||
int ghosts_per_gbin;
|
||||
DAT::tdual_int_1d k_gbincount;
|
||||
DAT::tdual_int_2d k_gbins;
|
||||
typename AT::t_int_1d gbincount;
|
||||
typename AT::t_int_2d gbins;
|
||||
typename AT::t_int_2d_const c_gbins;
|
||||
|
||||
typename AT::t_int_scalar d_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename AT::t_x_array_randomread x;
|
||||
|
||||
// Bounds of the local atoms in the bins array
|
||||
typename AT::t_int_scalar d_lbinxlo; // lowest local bin x-dim coordinate
|
||||
typename AT::t_int_scalar d_lbinylo; // lowest local bin y-dim coordinate
|
||||
typename AT::t_int_scalar d_lbinzlo; // lowest local bin z-dim coordinate
|
||||
typename AT::t_int_scalar d_lbinxhi; // highest local bin x-dim coordinate
|
||||
typename AT::t_int_scalar d_lbinyhi; // highest local bin y-dim coordinate
|
||||
typename AT::t_int_scalar d_lbinzhi; // highest local bin z-dim coordinate
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinxlo;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinylo;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinzlo;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinxhi;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinyhi;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinzhi;
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binAtomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binIDAtomsItem(const int &i, int &update) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binIDGhostsItem(const int &i, int &update) const;
|
||||
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void sortBin(
|
||||
typename AT::t_int_1d gbincount,
|
||||
typename AT::t_int_2d gbins,
|
||||
const int &ibin);
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
convert atom coords into the ssa active interaction region number
|
||||
------------------------------------------------------------------------- */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2ssaAIR(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix, iy, iz;
|
||||
ix = iy = iz = 0;
|
||||
if (z < sublo_[2]) iz = -1;
|
||||
if (z >= subhi_[2]) iz = 1;
|
||||
if (y < sublo_[1]) iy = -1;
|
||||
if (y >= subhi_[1]) iy = 1;
|
||||
if (x < sublo_[0]) ix = -1;
|
||||
if (x >= subhi_[0]) ix = 1;
|
||||
if(iz < 0){
|
||||
return -1;
|
||||
} else if(iz == 0){
|
||||
if( iy<0 ) return -1; // bottom left/middle/right
|
||||
if( (iy==0) && (ix<0) ) return -1; // left atoms
|
||||
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
|
||||
if( (iy==0) && (ix>0) ) return 2; // Right atoms
|
||||
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
|
||||
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
|
||||
} else { // iz > 0
|
||||
if((ix==0) && (iy==0)) return 4; // Back atoms
|
||||
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
|
||||
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
|
||||
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
private:
|
||||
double bboxlo_[3],bboxhi_[3];
|
||||
double sublo_[3], subhi_[3];
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct NPairSSAKokkosBinAtomsFunctor {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NBinSSAKokkos<DeviceType> c;
|
||||
|
||||
NPairSSAKokkosBinAtomsFunctor(const NBinSSAKokkos<DeviceType> &_c):
|
||||
c(_c) {};
|
||||
~NPairSSAKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binAtomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct NPairSSAKokkosBinIDAtomsFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
|
||||
const NBinSSAKokkos<DeviceType> c;
|
||||
|
||||
NPairSSAKokkosBinIDAtomsFunctor(const NBinSSAKokkos<DeviceType> &_c):
|
||||
c(_c) {};
|
||||
~NPairSSAKokkosBinIDAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i, value_type& update) const {
|
||||
c.binIDAtomsItem(i, update);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join (volatile value_type& dst,
|
||||
const volatile value_type& src) const {
|
||||
if (dst < src) dst = src;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init (value_type& dst) const {
|
||||
dst = INT_MIN;
|
||||
}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct NPairSSAKokkosBinIDGhostsFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
|
||||
const NBinSSAKokkos<DeviceType> c;
|
||||
|
||||
NPairSSAKokkosBinIDGhostsFunctor(const NBinSSAKokkos<DeviceType> &_c):
|
||||
c(_c) {};
|
||||
~NPairSSAKokkosBinIDGhostsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i, value_type& update) const {
|
||||
c.binIDGhostsItem(i, update);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join (volatile value_type& dst,
|
||||
const volatile value_type& src) const {
|
||||
if (dst < src) dst = src;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init (value_type& dst) const {
|
||||
dst = INT_MIN;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -274,7 +274,6 @@ void NeighBondKokkos<DeviceType>::bond_all()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondAll>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -370,7 +369,6 @@ void NeighBondKokkos<DeviceType>::bond_partial()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondPartial>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -443,7 +441,6 @@ void NeighBondKokkos<DeviceType>::bond_check()
|
|||
k_bondlist.sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondCheck>(0,neighbor->nbondlist),*this,flag);
|
||||
DeviceType::fence();
|
||||
|
||||
int flag_all;
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
|
@ -494,7 +491,6 @@ void NeighBondKokkos<DeviceType>::angle_all()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleAll>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -597,7 +593,6 @@ void NeighBondKokkos<DeviceType>::angle_partial()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAnglePartial>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -678,7 +673,6 @@ void NeighBondKokkos<DeviceType>::angle_check()
|
|||
k_anglelist.sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleCheck>(0,neighbor->nanglelist),*this,flag);
|
||||
DeviceType::fence();
|
||||
|
||||
int flag_all;
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
|
@ -741,7 +735,6 @@ void NeighBondKokkos<DeviceType>::dihedral_all()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralAll>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -849,7 +842,6 @@ void NeighBondKokkos<DeviceType>::dihedral_partial()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralPartial>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -935,7 +927,6 @@ void NeighBondKokkos<DeviceType>::dihedral_check(int nlist, typename AT::t_int_2
|
|||
k_dihedrallist.sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralCheck>(0,nlist),*this,flag);
|
||||
DeviceType::fence();
|
||||
|
||||
int flag_all;
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
|
@ -1015,7 +1006,6 @@ void NeighBondKokkos<DeviceType>::improper_all()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperAll>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
@ -1123,7 +1113,6 @@ void NeighBondKokkos<DeviceType>::improper_partial()
|
|||
k_fail_flag.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperPartial>(0,nlocal),*this,nmissing);
|
||||
DeviceType::fence();
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
|
|
|
@ -48,7 +48,7 @@ class AtomNeighborsConst
|
|||
const int num_neighs;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighborsConst(int* const & firstneigh, const int & _num_neighs,
|
||||
AtomNeighborsConst(const int* const & firstneigh, const int & _num_neighs,
|
||||
const int & stride):
|
||||
_firstneigh(firstneigh), num_neighs(_num_neighs), _stride(stride) {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
@ -82,6 +82,14 @@ public:
|
|||
&d_neighbors(i,1)-&d_neighbors(i,0));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static AtomNeighborsConst static_neighbors_const(int i,
|
||||
typename ArrayTypes<Device>::t_neighbors_2d_const const& d_neighbors,
|
||||
typename ArrayTypes<Device>::t_int_1d_const const& d_numneigh) {
|
||||
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
|
||||
&d_neighbors(i,1)-&d_neighbors(i,0));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighborsConst get_neighbors_const(const int &i) const {
|
||||
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
|
||||
|
|
|
@ -206,7 +206,6 @@ int NeighborKokkos::check_distance_kokkos()
|
|||
int flag = 0;
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighborCheckDistance<DeviceType> >(0,nlocal),*this,flag);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
int flagall;
|
||||
|
@ -273,7 +272,6 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
|||
}
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNeighborXhold<DeviceType> >(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
xhold.modify<DeviceType>();
|
||||
if (boxcheck) {
|
||||
|
|
|
@ -173,12 +173,6 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
|
|||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.d_numneigh = list->d_numneigh;
|
||||
}
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
|
@ -220,7 +214,6 @@ if (GHOST) {
|
|||
#endif
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
|
@ -435,10 +428,10 @@ void NeighborKokkosExecute<DeviceType>::
|
|||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
if(n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
|
||||
neigh_list.d_ilist(i) = i;
|
||||
|
@ -645,10 +638,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
|||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
if(n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -737,9 +730,9 @@ void NeighborKokkosExecute<DeviceType>::
|
|||
const int ybin = binxyz[1];
|
||||
const int zbin = binxyz[2];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const X_FLOAT xbin2 = xbin + stencilxyz(k,0);
|
||||
const X_FLOAT ybin2 = ybin + stencilxyz(k,1);
|
||||
const X_FLOAT zbin2 = zbin + stencilxyz(k,2);
|
||||
const int xbin2 = xbin + stencilxyz(k,0);
|
||||
const int ybin2 = ybin + stencilxyz(k,1);
|
||||
const int zbin2 = zbin + stencilxyz(k,2);
|
||||
if (xbin2 < 0 || xbin2 >= mbinx ||
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
|
@ -768,10 +761,10 @@ void NeighborKokkosExecute<DeviceType>::
|
|||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
if(n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
|
|
@ -281,9 +281,6 @@ class NeighborKokkosExecute
|
|||
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
|
|
|
@ -0,0 +1,750 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_ssa_kokkos.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "nbin_ssa_kokkos.h"
|
||||
#include "nstencil_ssa.h"
|
||||
#include "error.h"
|
||||
#include "comm.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NPairSSAKokkos<DeviceType>::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27), ssa_gphaseCt(7)
|
||||
{
|
||||
const int gphaseLenEstimate = 1; //FIXME make this 4 eventually
|
||||
k_ssa_gphaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_gphaseLen",ssa_gphaseCt);
|
||||
ssa_gphaseLen = k_ssa_gphaseLen.view<DeviceType>();
|
||||
|
||||
k_ssa_gitemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLoc",ssa_gphaseCt,gphaseLenEstimate);
|
||||
ssa_gitemLoc = k_ssa_gitemLoc.view<DeviceType>();
|
||||
k_ssa_gitemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLen",ssa_gphaseCt,gphaseLenEstimate);
|
||||
ssa_gitemLen = k_ssa_gitemLen.view<DeviceType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from Neighbor class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkos<DeviceType>::copy_neighbor_info()
|
||||
{
|
||||
NPair::copy_neighbor_info();
|
||||
|
||||
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
|
||||
|
||||
// general params
|
||||
|
||||
k_cutneighsq = neighborKK->k_cutneighsq;
|
||||
|
||||
// exclusion info
|
||||
|
||||
k_ex1_type = neighborKK->k_ex1_type;
|
||||
k_ex2_type = neighborKK->k_ex2_type;
|
||||
k_ex_type = neighborKK->k_ex_type;
|
||||
k_ex1_group = neighborKK->k_ex1_group;
|
||||
k_ex2_group = neighborKK->k_ex2_group;
|
||||
k_ex1_bit = neighborKK->k_ex1_bit;
|
||||
k_ex2_bit = neighborKK->k_ex2_bit;
|
||||
k_ex_mol_group = neighborKK->k_ex_mol_group;
|
||||
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy per-atom and per-bin vectors from NBinSSAKokkos class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkos<DeviceType>::copy_bin_info()
|
||||
{
|
||||
NPair::copy_bin_info();
|
||||
|
||||
NBinSSAKokkos<DeviceType>* nbKK = dynamic_cast<NBinSSAKokkos<DeviceType>*>(nb);
|
||||
if (!nbKK) error->one(FLERR, "NBin wasn't a NBinSSAKokkos object");
|
||||
|
||||
atoms_per_bin = nbKK->atoms_per_bin;
|
||||
k_bincount = nbKK->k_bincount;
|
||||
k_bins = nbKK->k_bins;
|
||||
|
||||
ghosts_per_gbin = nbKK->ghosts_per_gbin;
|
||||
k_gbincount = nbKK->k_gbincount;
|
||||
k_gbins = nbKK->k_gbins;
|
||||
|
||||
lbinxlo = nbKK->h_lbinxlo();
|
||||
lbinxhi = nbKK->h_lbinxhi();
|
||||
lbinylo = nbKK->h_lbinylo();
|
||||
lbinyhi = nbKK->h_lbinyhi();
|
||||
lbinzlo = nbKK->h_lbinzlo();
|
||||
lbinzhi = nbKK->h_lbinzhi();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from NStencil class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkos<DeviceType>::copy_stencil_info()
|
||||
{
|
||||
NPair::copy_stencil_info();
|
||||
|
||||
nstencil = ns->nstencil;
|
||||
|
||||
int maxstencil = ns->get_maxstencil();
|
||||
|
||||
k_stencil = DAT::tdual_int_1d("NPairSSAKokkos:stencil",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencil.h_view(k) = ns->stencil[k];
|
||||
}
|
||||
k_stencil.modify<LMPHostType>();
|
||||
k_stencil.sync<DeviceType>();
|
||||
k_stencilxyz = DAT::tdual_int_1d_3("NPairSSAKokkos:stencilxyz",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
|
||||
}
|
||||
k_stencilxyz.modify<LMPHostType>();
|
||||
k_stencilxyz.sync<DeviceType>();
|
||||
|
||||
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
|
||||
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
|
||||
|
||||
k_nstencil_ssa = DAT::tdual_int_1d("NPairSSAKokkos:nstencil_ssa",5);
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
k_nstencil_ssa.h_view(k) = ns_ssa->nstencil_ssa[k];
|
||||
}
|
||||
k_nstencil_ssa.modify<LMPHostType>();
|
||||
k_nstencil_ssa.sync<DeviceType>();
|
||||
sx1 = ns_ssa->sx + 1;
|
||||
sy1 = ns_ssa->sy + 1;
|
||||
sz1 = ns_ssa->sz + 1;
|
||||
|
||||
// Setup the phases of the workplan for locals
|
||||
ssa_phaseCt = sz1*sy1*sx1;
|
||||
if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) {
|
||||
k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt);
|
||||
ssa_phaseLen = k_ssa_phaseLen.view<DeviceType>();
|
||||
k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt);
|
||||
ssa_phaseOff = k_ssa_phaseOff.view<DeviceType>();
|
||||
}
|
||||
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
|
||||
k_ssa_phaseOff.sync<LMPHostType>();
|
||||
int workPhase = 0;
|
||||
for (int zoff = sz1 - 1; zoff >= 0; --zoff) {
|
||||
for (int yoff = sy1 - 1; yoff >= 0; --yoff) {
|
||||
for (int xoff = sx1 - 1; xoff >= 0; --xoff) {
|
||||
h_ssa_phaseOff(workPhase, 0) = xoff;
|
||||
h_ssa_phaseOff(workPhase, 1) = yoff;
|
||||
h_ssa_phaseOff(workPhase, 2) = zoff;
|
||||
workPhase++;
|
||||
}
|
||||
}
|
||||
}
|
||||
k_ssa_phaseOff.modify<LMPHostType>();
|
||||
k_ssa_phaseOff.sync<DeviceType>();
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NPairSSAKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
|
||||
{
|
||||
const int n1 = nspecial(i,0);
|
||||
const int n2 = nspecial(i,1);
|
||||
const int n3 = nspecial(i,2);
|
||||
|
||||
for (int k = 0; k < n3; k++) {
|
||||
if (special(i,k) == tag(j)) {
|
||||
if (k < n1) {
|
||||
if (special_flag[1] == 0) return -1;
|
||||
else if (special_flag[1] == 1) return 0;
|
||||
else return 1;
|
||||
} else if (k < n2) {
|
||||
if (special_flag[2] == 0) return -1;
|
||||
else if (special_flag[2] == 1) return 0;
|
||||
else return 2;
|
||||
} else {
|
||||
if (special_flag[3] == 0) return -1;
|
||||
else if (special_flag[3] == 1) return 0;
|
||||
else return 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NPairSSAKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
|
||||
const int &itype,const int &jtype) const
|
||||
{
|
||||
int m;
|
||||
|
||||
if (nex_type && ex_type(itype,jtype)) return 1;
|
||||
|
||||
if (nex_group) {
|
||||
for (m = 0; m < nex_group; m++) {
|
||||
if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
|
||||
if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (nex_mol) {
|
||||
for (m = 0; m < nex_mol; m++)
|
||||
if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
|
||||
molecule(i) == molecule(j)) return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
for use by Shardlow Spliting Algorithm
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
|
||||
{
|
||||
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
|
||||
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
|
||||
int nl_size;
|
||||
|
||||
int xbinCt = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1;
|
||||
int ybinCt = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1;
|
||||
int zbinCt = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1;
|
||||
int phaseLenEstimate = xbinCt*ybinCt*zbinCt;
|
||||
|
||||
if ((ssa_phaseCt > (int) k_ssa_itemLoc.dimension_0()) ||
|
||||
(phaseLenEstimate > (int) k_ssa_itemLoc.dimension_1())) {
|
||||
k_ssa_itemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLoc",ssa_phaseCt,phaseLenEstimate);
|
||||
ssa_itemLoc = k_ssa_itemLoc.view<DeviceType>();
|
||||
k_ssa_itemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLen",ssa_phaseCt,phaseLenEstimate);
|
||||
ssa_itemLen = k_ssa_itemLen.view<DeviceType>();
|
||||
}
|
||||
|
||||
k_ssa_itemLoc.sync<LMPHostType>();
|
||||
k_ssa_itemLen.sync<LMPHostType>();
|
||||
k_ssa_gitemLoc.sync<LMPHostType>();
|
||||
k_ssa_gitemLen.sync<LMPHostType>();
|
||||
k_ssa_phaseOff.sync<LMPHostType>();
|
||||
k_ssa_phaseLen.sync<LMPHostType>();
|
||||
auto h_ssa_itemLoc = k_ssa_itemLoc.h_view;
|
||||
auto h_ssa_itemLen = k_ssa_itemLen.h_view;
|
||||
auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view;
|
||||
auto h_ssa_gitemLen = k_ssa_gitemLen.h_view;
|
||||
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
|
||||
auto h_ssa_phaseLen = k_ssa_phaseLen.h_view;
|
||||
|
||||
{ // Preflight the neighbor list workplan
|
||||
k_bincount.sync<LMPHostType>();
|
||||
auto h_bincount = k_bincount.h_view;
|
||||
k_stencil.sync<LMPHostType>();
|
||||
auto h_stencil = k_stencil.h_view;
|
||||
k_nstencil_ssa.sync<LMPHostType>();
|
||||
auto h_nstencil_ssa = k_nstencil_ssa.h_view;
|
||||
int inum = 0;
|
||||
|
||||
// loop over bins with local atoms, counting half of the neighbors
|
||||
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int zoff = h_ssa_phaseOff(workPhase, 2);
|
||||
int yoff = h_ssa_phaseOff(workPhase, 1);
|
||||
int xoff = h_ssa_phaseOff(workPhase, 0);
|
||||
int workItem = 0;
|
||||
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
|
||||
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
|
||||
for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) {
|
||||
int inum_start = inum;
|
||||
// if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small");
|
||||
|
||||
for (int subphase = 0; subphase < 4; subphase++) {
|
||||
int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0);
|
||||
int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0);
|
||||
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
|
||||
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
|
||||
|
||||
const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
|
||||
const int ibinCt = h_bincount(ibin);
|
||||
if (ibinCt > 0) {
|
||||
int base_n = 0;
|
||||
bool include_same = false;
|
||||
// count all local atoms in the current stencil "subphase" as potential neighbors
|
||||
for (int k = h_nstencil_ssa(subphase); k < h_nstencil_ssa(subphase+1); k++) {
|
||||
const int jbin = ibin+h_stencil(k);
|
||||
if (jbin != ibin) base_n += h_bincount(jbin);
|
||||
else include_same = true;
|
||||
}
|
||||
// Calculate how many ibin particles would have had some neighbors
|
||||
if (base_n > 0) inum += ibinCt;
|
||||
else if (include_same) inum += ibinCt - 1;
|
||||
}
|
||||
}
|
||||
h_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
|
||||
h_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
if (h_ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
|
||||
,comm->me
|
||||
,workPhase
|
||||
,workItem
|
||||
,inum
|
||||
,inum_start
|
||||
);
|
||||
#endif
|
||||
workItem++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n"
|
||||
,comm->me
|
||||
,workPhase
|
||||
,inum - h_ssa_itemLoc(workPhase, 0)
|
||||
,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt
|
||||
,workItem
|
||||
,(inum - h_ssa_itemLoc(workPhase, 0)) / (double) workItem
|
||||
);
|
||||
#endif
|
||||
// record where workPhase ends
|
||||
h_ssa_phaseLen(workPhase) = workItem;
|
||||
}
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n"
|
||||
,comm->me
|
||||
,workPhase
|
||||
,inum
|
||||
,nlocal*4
|
||||
,inum / (double) workPhase
|
||||
);
|
||||
#endif
|
||||
nl_size = inum; // record how much space is needed for the local work plan
|
||||
}
|
||||
|
||||
// count how many ghosts might have neighbors, and increase the work plan storage
|
||||
k_gbincount.sync<LMPHostType>();
|
||||
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
|
||||
int len = k_gbincount.h_view(workPhase + 1);
|
||||
h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
|
||||
h_ssa_gitemLen(workPhase,0) = len;
|
||||
nl_size += len;
|
||||
}
|
||||
list->grow(nl_size); // Make special larger SSA neighbor list
|
||||
|
||||
k_ssa_itemLoc.modify<LMPHostType>();
|
||||
k_ssa_itemLen.modify<LMPHostType>();
|
||||
k_ssa_gitemLoc.modify<LMPHostType>();
|
||||
k_ssa_gitemLen.modify<LMPHostType>();
|
||||
k_ssa_phaseLen.modify<LMPHostType>();
|
||||
k_ssa_itemLoc.sync<DeviceType>();
|
||||
k_ssa_itemLen.sync<DeviceType>();
|
||||
k_ssa_gitemLen.sync<DeviceType>();
|
||||
k_ssa_gitemLoc.sync<DeviceType>();
|
||||
k_ssa_phaseOff.sync<DeviceType>();
|
||||
k_ssa_phaseLen.sync<DeviceType>();
|
||||
k_ssa_gphaseLen.sync<DeviceType>();
|
||||
|
||||
NPairSSAKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),
|
||||
k_gbincount.view<DeviceType>(),
|
||||
k_gbins.view<DeviceType>(),
|
||||
lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi,
|
||||
nstencil, sx1, sy1, sz1,
|
||||
k_stencil.view<DeviceType>(),
|
||||
k_stencilxyz.view<DeviceType>(),
|
||||
k_nstencil_ssa.view<DeviceType>(),
|
||||
ssa_phaseCt,
|
||||
k_ssa_phaseLen.view<DeviceType>(),
|
||||
k_ssa_phaseOff.view<DeviceType>(),
|
||||
k_ssa_itemLoc.view<DeviceType>(),
|
||||
k_ssa_itemLen.view<DeviceType>(),
|
||||
ssa_gphaseCt,
|
||||
k_ssa_gphaseLen.view<DeviceType>(),
|
||||
k_ssa_gitemLoc.view<DeviceType>(),
|
||||
k_ssa_gitemLen.view<DeviceType>(),
|
||||
nlocal,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
atomKK->k_tag.view<DeviceType>(),
|
||||
atomKK->k_special.view<DeviceType>(),
|
||||
atomKK->k_nspecial.view<DeviceType>(),
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
exclude, nex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
k_bincount.sync<DeviceType>();
|
||||
k_bins.sync<DeviceType>();
|
||||
k_gbincount.sync<DeviceType>();
|
||||
k_gbins.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
bool firstTry = true;
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
|
||||
// loop over bins with local atoms, storing half of the neighbors
|
||||
Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) {
|
||||
data.build_locals_onePhase(firstTry, comm->me, workPhase);
|
||||
});
|
||||
k_ssa_itemLoc.modify<DeviceType>();
|
||||
k_ssa_itemLen.modify<DeviceType>();
|
||||
k_ssa_phaseLen.modify<DeviceType>();
|
||||
k_ssa_itemLoc.sync<LMPHostType>();
|
||||
k_ssa_itemLen.sync<LMPHostType>();
|
||||
k_ssa_phaseLen.sync<LMPHostType>();
|
||||
data.neigh_list.inum = h_ssa_itemLoc(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1) +
|
||||
h_ssa_itemLen(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1);
|
||||
|
||||
// loop over AIR ghost atoms, storing their local neighbors
|
||||
Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) {
|
||||
data.build_ghosts_onePhase(workPhase);
|
||||
});
|
||||
k_ssa_gitemLoc.modify<DeviceType>();
|
||||
k_ssa_gitemLen.modify<DeviceType>();
|
||||
k_ssa_gphaseLen.modify<DeviceType>();
|
||||
k_ssa_gitemLoc.sync<LMPHostType>();
|
||||
k_ssa_gitemLen.sync<LMPHostType>();
|
||||
k_ssa_gphaseLen.sync<LMPHostType>();
|
||||
auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view;
|
||||
data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) +
|
||||
h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
|
||||
firstTry = false;
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
}
|
||||
|
||||
//k_ssa_phaseLen.modify<DeviceType>();
|
||||
//k_ssa_itemLoc.modify<DeviceType>();
|
||||
//k_ssa_itemLen.modify<DeviceType>();
|
||||
//k_ssa_gphaseLen.modify<DeviceType>();
|
||||
//k_ssa_gitemLoc.modify<DeviceType>();
|
||||
//k_ssa_gitemLen.modify<DeviceType>();
|
||||
|
||||
list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for
|
||||
list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something
|
||||
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n"
|
||||
,comm->me
|
||||
,list->inum
|
||||
,list->gnum
|
||||
,list->inum + list->gnum
|
||||
,nl_size
|
||||
);
|
||||
#endif
|
||||
|
||||
list->k_ilist.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTry, int me, int workPhase) const
|
||||
{
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil;
|
||||
int which = 0;
|
||||
|
||||
int zoff = d_ssa_phaseOff(workPhase, 2);
|
||||
int yoff = d_ssa_phaseOff(workPhase, 1);
|
||||
int xoff = d_ssa_phaseOff(workPhase, 0);
|
||||
int workItem = 0;
|
||||
int skippedItems = 0;
|
||||
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
|
||||
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
|
||||
for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) {
|
||||
if (d_ssa_itemLen(workPhase, workItem + skippedItems) == 0) {
|
||||
if (firstTry) ++skippedItems;
|
||||
else ++workItem; // phase is done,should break out of three loops here if we could...
|
||||
continue;
|
||||
}
|
||||
int inum_start = d_ssa_itemLoc(workPhase, workItem + skippedItems);
|
||||
int inum = inum_start;
|
||||
|
||||
for (int subphase = 0; subphase < 4; subphase++) {
|
||||
int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0);
|
||||
int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0);
|
||||
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
|
||||
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
|
||||
|
||||
int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
|
||||
for (int il = 0; il < c_bincount(ibin); ++il) {
|
||||
const int i = c_bins(ibin, il);
|
||||
int n = 0;
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
// loop over all local atoms in the current stencil "subphase"
|
||||
for (int k = d_nstencil_ssa(subphase); k < d_nstencil_ssa(subphase+1); k++) {
|
||||
const int jbin = ibin+stencil(k);
|
||||
int jl;
|
||||
if (jbin != ibin) jl = 0;
|
||||
else jl = il + 1; // same bin as i, so start just past i in the bin
|
||||
for (; jl < c_bincount(jbin); ++jl) {
|
||||
const int j = c_bins(jbin, jl);
|
||||
const int jtype = type(j);
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
neigh_list.d_numneigh(inum) = n;
|
||||
neigh_list.d_ilist(inum++) = i;
|
||||
if(n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int len = inum - inum_start;
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
if (len != d_ssa_itemLen(workPhase, workItem + skippedItems)) {
|
||||
fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = %4d%s\n"
|
||||
,me
|
||||
,workPhase
|
||||
,workItem
|
||||
,workItem + skippedItems
|
||||
,len
|
||||
,d_ssa_itemLen(workPhase, workItem + skippedItems)
|
||||
,(len > d_ssa_itemLen(workPhase, workItem + skippedItems)) ? " OVERFLOW" : ""
|
||||
);
|
||||
}
|
||||
#endif
|
||||
if (inum > inum_start) {
|
||||
d_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
|
||||
d_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record actual workItem length
|
||||
workItem++;
|
||||
} else if (firstTry) ++skippedItems;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = %3d, inums/workItems = %g\n"
|
||||
,me
|
||||
,workPhase
|
||||
,inum - d_ssa_itemLoc(workPhase, 0)
|
||||
,workItem
|
||||
,skippedItems
|
||||
,(inum - d_ssa_itemLoc(workPhase, 0)) / (double) workItem
|
||||
);
|
||||
#endif
|
||||
// record where workPhase actually ends
|
||||
if (firstTry) {
|
||||
d_ssa_phaseLen(workPhase) = workItem;
|
||||
while (workItem < (int) d_ssa_itemLen.dimension_1()) {
|
||||
d_ssa_itemLen(workPhase,workItem++) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) const
|
||||
{
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil;
|
||||
int which = 0;
|
||||
|
||||
// since these are ghosts, must check if stencil bin is out of bounds
|
||||
int airnum = workPhase + 1;
|
||||
//FIXME for now, there is only 1 workItem for each ghost AIR
|
||||
int workItem;
|
||||
for (workItem = 0; workItem < 1; ++workItem) {
|
||||
int gNdx = d_ssa_gitemLoc(workPhase, workItem); // record where workItem starts in ilist
|
||||
for (int il = 0; il < c_gbincount(airnum); ++il) {
|
||||
const int i = c_gbins(airnum, il);
|
||||
int n = 0;
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(gNdx);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
int loc[3];
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0]));
|
||||
|
||||
// loop over AIR ghost atoms in all bins in "full" stencil
|
||||
// Note: the non-AIR ghost atoms have already been filtered out
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
int xbin2 = loc[0] + d_stencilxyz(k,0);
|
||||
int ybin2 = loc[1] + d_stencilxyz(k,1);
|
||||
int zbin2 = loc[2] + d_stencilxyz(k,2);
|
||||
// Skip it if this bin is outside the extent of local bins
|
||||
if (xbin2 < lbinxlo || xbin2 >= lbinxhi ||
|
||||
ybin2 < lbinylo || ybin2 >= lbinyhi ||
|
||||
zbin2 < lbinzlo || zbin2 >= lbinzhi) continue;
|
||||
const int jbin = ibin+stencil(k);
|
||||
for (int jl = 0; jl < c_bincount(jbin); ++jl) {
|
||||
const int j = c_bins(jbin, jl);
|
||||
const int jtype = type(j);
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(j,i);
|
||||
/* else if (jmol >= 0) */
|
||||
/* which = find_special(onemols[jmol]->special[jatom], */
|
||||
/* onemols[jmol]->nspecial[jatom], */
|
||||
/* tag[i]-jtagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
neigh_list.d_numneigh(gNdx) = n;
|
||||
neigh_list.d_ilist(gNdx++) = i;
|
||||
if(n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
// record where workItem ends in ilist
|
||||
d_ssa_gitemLen(workPhase,workItem) = gNdx - d_ssa_gitemLoc(workPhase,workItem);
|
||||
// if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++;
|
||||
}
|
||||
d_ssa_gphaseLen(workPhase) = workItem;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairSSAKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NPairSSAKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,362 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
typedef NPairSSAKokkos<LMPHostType> NPairSSAKokkosHost;
|
||||
NPairStyle(half/bin/newton/ssa/kk/host,
|
||||
NPairSSAKokkosHost,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_HOST)
|
||||
|
||||
typedef NPairSSAKokkos<LMPDeviceType> NPairSSAKokkosDevice;
|
||||
NPairStyle(half/bin/newton/ssa/kk/device,
|
||||
NPairSSAKokkosDevice,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_DEVICE)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_SSA_KOKKOS_H
|
||||
#define LMP_NPAIR_SSA_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NPairSSAKokkos : public NPair {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
// SSA Work plan data structures
|
||||
int ssa_phaseCt;
|
||||
DAT::tdual_int_1d k_ssa_phaseLen;
|
||||
DAT::tdual_int_1d_3 k_ssa_phaseOff;
|
||||
DAT::tdual_int_2d k_ssa_itemLoc;
|
||||
DAT::tdual_int_2d k_ssa_itemLen;
|
||||
typename AT::t_int_1d ssa_phaseLen;
|
||||
typename AT::t_int_1d_3 ssa_phaseOff;
|
||||
typename AT::t_int_2d ssa_itemLoc;
|
||||
typename AT::t_int_2d ssa_itemLen;
|
||||
|
||||
const int ssa_gphaseCt;
|
||||
DAT::tdual_int_1d k_ssa_gphaseLen;
|
||||
DAT::tdual_int_2d k_ssa_gitemLoc;
|
||||
DAT::tdual_int_2d k_ssa_gitemLen;
|
||||
typename AT::t_int_1d ssa_gphaseLen;
|
||||
typename AT::t_int_2d ssa_gitemLoc;
|
||||
typename AT::t_int_2d ssa_gitemLen;
|
||||
|
||||
NPairSSAKokkos(class LAMMPS *);
|
||||
~NPairSSAKokkos() {}
|
||||
void copy_neighbor_info();
|
||||
void copy_bin_info();
|
||||
void copy_stencil_info();
|
||||
void build(class NeighList *);
|
||||
private:
|
||||
// data from Neighbor class
|
||||
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
// data from NBinSSA class
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
int ghosts_per_gbin;
|
||||
DAT::tdual_int_1d k_gbincount;
|
||||
DAT::tdual_int_2d k_gbins;
|
||||
int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi;
|
||||
|
||||
// data from NStencilSSA class
|
||||
|
||||
int nstencil;
|
||||
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
|
||||
DAT::tdual_int_1d_3 k_stencilxyz;
|
||||
DAT::tdual_int_1d k_nstencil_ssa;
|
||||
int sx1, sy1, sz1;
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
class NPairSSAKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<DeviceType> neigh_list;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
// data from NBinSSA class
|
||||
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
const typename AT::t_int_1d gbincount;
|
||||
const typename AT::t_int_1d_const c_gbincount;
|
||||
typename AT::t_int_2d gbins;
|
||||
typename AT::t_int_2d_const c_gbins;
|
||||
const int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi;
|
||||
|
||||
|
||||
// data from NStencil class
|
||||
|
||||
const int nstencil;
|
||||
const int sx1, sy1, sz1;
|
||||
typename AT::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename AT::t_int_1d_3 d_stencilxyz;
|
||||
typename AT::t_int_1d d_nstencil_ssa;
|
||||
|
||||
// data from Atom class
|
||||
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask;
|
||||
const typename AT::t_tagint_1d_const molecule;
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
// SSA Work plan data structures
|
||||
int ssa_phaseCt;
|
||||
typename AT::t_int_1d d_ssa_phaseLen;
|
||||
typename AT::t_int_1d_3_const d_ssa_phaseOff;
|
||||
typename AT::t_int_2d d_ssa_itemLoc;
|
||||
typename AT::t_int_2d d_ssa_itemLen;
|
||||
int ssa_gphaseCt;
|
||||
typename AT::t_int_1d d_ssa_gphaseLen;
|
||||
typename AT::t_int_2d d_ssa_gitemLoc;
|
||||
typename AT::t_int_2d d_ssa_gitemLen;
|
||||
|
||||
NPairSSAKokkosExecute(
|
||||
const NeighListKokkos<DeviceType> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const typename AT::t_int_1d &_gbincount,
|
||||
const typename AT::t_int_2d &_gbins,
|
||||
const int _lbinxlo, const int _lbinxhi,
|
||||
const int _lbinylo, const int _lbinyhi,
|
||||
const int _lbinzlo, const int _lbinzhi,
|
||||
const int _nstencil, const int _sx1, const int _sy1, const int _sz1,
|
||||
const typename AT::t_int_1d &_d_stencil,
|
||||
const typename AT::t_int_1d_3 &_d_stencilxyz,
|
||||
const typename AT::t_int_1d &_d_nstencil_ssa,
|
||||
const int _ssa_phaseCt,
|
||||
const typename AT::t_int_1d &_d_ssa_phaseLen,
|
||||
const typename AT::t_int_1d_3 &_d_ssa_phaseOff,
|
||||
const typename AT::t_int_2d &_d_ssa_itemLoc,
|
||||
const typename AT::t_int_2d &_d_ssa_itemLen,
|
||||
const int _ssa_gphaseCt,
|
||||
const typename AT::t_int_1d &_d_ssa_gphaseLen,
|
||||
const typename AT::t_int_2d &_d_ssa_gitemLoc,
|
||||
const typename AT::t_int_2d &_d_ssa_gitemLen,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_tagint_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
gbincount(_gbincount),c_gbincount(_gbincount),gbins(_gbins),c_gbins(_gbins),
|
||||
lbinxlo(_lbinxlo),lbinxhi(_lbinxhi),
|
||||
lbinylo(_lbinylo),lbinyhi(_lbinyhi),
|
||||
lbinzlo(_lbinzlo),lbinzhi(_lbinzhi),
|
||||
nstencil(_nstencil),sx1(_sx1),sy1(_sy1),sz1(_sz1),
|
||||
d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),d_nstencil_ssa(_d_nstencil_ssa),
|
||||
ssa_phaseCt(_ssa_phaseCt),
|
||||
d_ssa_phaseLen(_d_ssa_phaseLen),
|
||||
d_ssa_phaseOff(_d_ssa_phaseOff),
|
||||
d_ssa_itemLoc(_d_ssa_itemLoc),
|
||||
d_ssa_itemLen(_d_ssa_itemLen),
|
||||
ssa_gphaseCt(_ssa_gphaseCt),
|
||||
d_ssa_gphaseLen(_d_ssa_gphaseLen),
|
||||
d_ssa_gitemLoc(_d_ssa_gitemLoc),
|
||||
d_ssa_gitemLen(_d_ssa_gitemLen),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NPairSSAKokkosExecute::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NPairSSAKokkosExecute::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NPairSSAKokkosExecute() {neigh_list.copymode = 1;};
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void build_locals_onePhase(const bool firstTry, int me, int workPhase) const;
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void build_ghosts_onePhase(int workPhase) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,796 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (Sandia)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "update.h"
|
||||
#include "fix.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "pair_dpd_fdt_energy_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define EPSILON 1.0e-10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairDPDfdtEnergyKokkos<DeviceType>::PairDPDfdtEnergyKokkos(LAMMPS *lmp) :
|
||||
PairDPDfdtEnergy(lmp),
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
rand_pool(0 /* unused */, lmp)
|
||||
#else
|
||||
rand_pool()
|
||||
#endif
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairDPDfdtEnergyKokkos<DeviceType>::~PairDPDfdtEnergyKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy_kokkos(k_duCond,duCond);
|
||||
memory->destroy_kokkos(k_duMech,duMech);
|
||||
}
|
||||
|
||||
memory->destroy_kokkos(k_cutsq,cutsq);
|
||||
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
rand_pool.destroy();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairDPDfdtEnergy::init_style();
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk");
|
||||
}
|
||||
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
rand_pool.init(random,seed);
|
||||
#else
|
||||
rand_pool.init(seed + comm->me,DeviceType::max_hardware_threads());
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
|
||||
// CUDA specialization of init_style to properly call rand_pool.init()
|
||||
template<>
|
||||
void PairDPDfdtEnergyKokkos<Kokkos::Cuda>::init_style()
|
||||
{
|
||||
PairDPDfdtEnergy::init_style();
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<Kokkos::Cuda,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<Kokkos::Cuda,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<Kokkos::Cuda,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk");
|
||||
}
|
||||
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
rand_pool.init(random,seed);
|
||||
#else
|
||||
rand_pool.init(seed + comm->me,4*32768 /*fake max_hardware_threads()*/);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag,0);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.template view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.template view<DeviceType>();
|
||||
}
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
mass = atomKK->k_mass.view<DeviceType>();
|
||||
rmass = atomKK->rmass;
|
||||
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_params.template sync<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
|
||||
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
|
||||
nlocal = atom->nlocal;
|
||||
int nghost = atom->nghost;
|
||||
int newton_pair = force->newton_pair;
|
||||
dtinvsqrt = 1.0/sqrt(update->dt);
|
||||
|
||||
int inum = list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
boltz = force->boltz;
|
||||
ftm2v = force->ftm2v;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
if (splitFDT_flag) {
|
||||
if (!a0_is_zero) {
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,0,true> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,0,true> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,0,true> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,0,true> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,0,true> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,1,true> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,0,true> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// Allocate memory for duCond and duMech
|
||||
if (allocated) {
|
||||
memory->destroy_kokkos(k_duCond,duCond);
|
||||
memory->destroy_kokkos(k_duMech,duMech);
|
||||
}
|
||||
memory->create_kokkos(k_duCond,duCond,nlocal+nghost,"pair:duCond");
|
||||
memory->create_kokkos(k_duMech,duMech,nlocal+nghost,"pair:duMech");
|
||||
d_duCond = k_duCond.view<DeviceType>();
|
||||
d_duMech = k_duMech.view<DeviceType>();
|
||||
h_duCond = k_duCond.h_view;
|
||||
h_duMech = k_duMech.h_view;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyZero>(0,nlocal+nghost),*this);
|
||||
|
||||
atomKK->sync(execution_space,V_MASK | DPDTHETA_MASK | RMASS_MASK);
|
||||
atomKK->k_mass.sync<DeviceType>();
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,0,false> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,1,false> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,0,false> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Communicate the ghost delta energies to the locally owned atoms
|
||||
|
||||
// this memory transfer can be removed when fix_dpd_fdt_energy_kokkos is added
|
||||
k_duCond.template modify<DeviceType>();
|
||||
k_duCond.template sync<LMPHostType>();
|
||||
k_duMech.template modify<DeviceType>();
|
||||
k_duMech.template sync<LMPHostType>();
|
||||
comm->reverse_comm_pair(this);
|
||||
}
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyZero, const int &ii) const {
|
||||
d_duCond[ii] = 0.0;
|
||||
d_duMech[ii] = 0.0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
|
||||
|
||||
int i,j,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r,rinv,wd,wr,factor_dpd;
|
||||
|
||||
i = d_ilist[ii];
|
||||
xtmp = x(i,0);
|
||||
ytmp = x(i,1);
|
||||
ztmp = x(i,2);
|
||||
itype = type[i];
|
||||
jnum = d_numneigh[i];
|
||||
|
||||
double fx_i = 0.0;
|
||||
double fy_i = 0.0;
|
||||
double fz_i = 0.0;
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = d_neighbors(i,jj);
|
||||
factor_dpd = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x(j,0);
|
||||
dely = ytmp - x(j,1);
|
||||
delz = ztmp - x(j,2);
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype);
|
||||
if (rsq < cutsq_ij) {
|
||||
r = sqrt(rsq);
|
||||
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
|
||||
rinv = 1.0/r;
|
||||
double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut;
|
||||
wr = 1.0 - r/cut_ij;
|
||||
wd = wr*wr;
|
||||
|
||||
// conservative force = a0 * wr
|
||||
double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0;
|
||||
fpair = a0_ij*wr;
|
||||
fpair *= factor_dpd*rinv;
|
||||
|
||||
fx_i += delx*fpair;
|
||||
fy_i += dely*fpair;
|
||||
fz_i += delz*fpair;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
a_f(j,0) -= delx*fpair;
|
||||
a_f(j,1) -= dely*fpair;
|
||||
a_f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
// unshifted eng of conservative term:
|
||||
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype));
|
||||
// eng shifted to 0.0 at cutoff
|
||||
evdwl = 0.5*a0_ij*cut_ij * wd;
|
||||
evdwl *= factor_dpd;
|
||||
if (EVFLAG)
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*evdwl;
|
||||
}
|
||||
|
||||
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
|
||||
a_f(i,0) += fx_i;
|
||||
a_f(i,1) += fy_i;
|
||||
a_f(i,2) += fz_i;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>(), ii, ev);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
// These array are atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_duCond = d_duCond;
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_duMech = d_duMech;
|
||||
|
||||
int i,j,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
|
||||
double rsq,r,rinv,wd,wr,factor_dpd,uTmp;
|
||||
double dot,randnum;
|
||||
|
||||
double kappa_ij, alpha_ij, theta_ij, gamma_ij;
|
||||
double mass_i, mass_j;
|
||||
double massinv_i, massinv_j;
|
||||
double randPair, mu_ij;
|
||||
|
||||
rand_type rand_gen = rand_pool.get_state();
|
||||
|
||||
i = d_ilist[ii];
|
||||
xtmp = x(i,0);
|
||||
ytmp = x(i,1);
|
||||
ztmp = x(i,2);
|
||||
vxtmp = v(i,0);
|
||||
vytmp = v(i,1);
|
||||
vztmp = v(i,2);
|
||||
itype = type[i];
|
||||
jnum = d_numneigh[i];
|
||||
|
||||
double fx_i = 0.0;
|
||||
double fy_i = 0.0;
|
||||
double fz_i = 0.0;
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = d_neighbors(i,jj);
|
||||
factor_dpd = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x(j,0);
|
||||
dely = ytmp - x(j,1);
|
||||
delz = ztmp - x(j,2);
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype);
|
||||
if (rsq < cutsq_ij) {
|
||||
r = sqrt(rsq);
|
||||
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
|
||||
rinv = 1.0/r;
|
||||
double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut;
|
||||
wr = 1.0 - r/cut_ij;
|
||||
wd = wr*wr;
|
||||
|
||||
delvx = vxtmp - v(j,0);
|
||||
delvy = vytmp - v(j,1);
|
||||
delvz = vztmp - v(j,2);
|
||||
dot = delx*delvx + dely*delvy + delz*delvz;
|
||||
randnum = rand_gen.normal();
|
||||
|
||||
// Compute the current temperature
|
||||
theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]);
|
||||
theta_ij = 1.0/theta_ij;
|
||||
|
||||
double sigma_ij = STACKPARAMS?m_params[itype][jtype].sigma:params(itype,jtype).sigma;
|
||||
gamma_ij = sigma_ij*sigma_ij
|
||||
/ (2.0*boltz*theta_ij);
|
||||
|
||||
// conservative force = a0 * wr
|
||||
// drag force = -gamma * wr^2 * (delx dot delv) / r
|
||||
// random force = sigma * wr * rnd * dtinvsqrt;
|
||||
|
||||
double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0;
|
||||
fpair = a0_ij*wr;
|
||||
fpair -= gamma_ij*wd*dot*rinv;
|
||||
fpair += sigma_ij*wr*randnum*dtinvsqrt;
|
||||
fpair *= factor_dpd*rinv;
|
||||
|
||||
fx_i += delx*fpair;
|
||||
fy_i += dely*fpair;
|
||||
fz_i += delz*fpair;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
a_f(j,0) -= delx*fpair;
|
||||
a_f(j,1) -= dely*fpair;
|
||||
a_f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if (rmass) {
|
||||
mass_i = rmass[i];
|
||||
mass_j = rmass[j];
|
||||
} else {
|
||||
mass_i = mass[itype];
|
||||
mass_j = mass[jtype];
|
||||
}
|
||||
massinv_i = 1.0 / mass_i;
|
||||
massinv_j = 1.0 / mass_j;
|
||||
|
||||
// Compute the mechanical and conductive energy, uMech and uCond
|
||||
mu_ij = massinv_i + massinv_j;
|
||||
mu_ij *= ftm2v;
|
||||
|
||||
uTmp = gamma_ij*wd*rinv*rinv*dot*dot
|
||||
- 0.5*sigma_ij*sigma_ij*mu_ij*wd;
|
||||
uTmp -= sigma_ij*wr*rinv*dot*randnum*dtinvsqrt;
|
||||
uTmp *= 0.5;
|
||||
|
||||
a_duMech[i] += uTmp;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
a_duMech[j] += uTmp;
|
||||
}
|
||||
|
||||
// Compute uCond
|
||||
randnum = rand_gen.normal();
|
||||
kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa;
|
||||
alpha_ij = sqrt(2.0*boltz*kappa_ij);
|
||||
randPair = alpha_ij*wr*randnum*dtinvsqrt;
|
||||
|
||||
uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
|
||||
uTmp += randPair;
|
||||
|
||||
a_duCond[i] += uTmp;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
a_duCond[j] -= uTmp;
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
// unshifted eng of conservative term:
|
||||
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype));
|
||||
// eng shifted to 0.0 at cutoff
|
||||
evdwl = 0.5*a0_ij*cut_ij * wd;
|
||||
evdwl *= factor_dpd;
|
||||
if (EVFLAG)
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*evdwl;
|
||||
}
|
||||
|
||||
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
|
||||
a_f(i,0) += fx_i;
|
||||
a_f(i,1) += fy_i;
|
||||
a_f(i,2) += fz_i;
|
||||
|
||||
rand_pool.free_state(rand_gen);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairDPDfdtEnergy::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
int nlocal = atom->nlocal;
|
||||
int nghost = atom->nghost;
|
||||
|
||||
memory->destroy(cutsq);
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
|
||||
k_params = Kokkos::DualView<params_dpd**,Kokkos::LayoutRight,DeviceType>("PairDPDfdtEnergy::params",n+1,n+1);
|
||||
params = k_params.template view<DeviceType>();
|
||||
|
||||
if (!splitFDT_flag) {
|
||||
memory->destroy(duCond);
|
||||
memory->destroy(duMech);
|
||||
memory->create_kokkos(k_duCond,duCond,nlocal+nghost+1,"pair:duCond");
|
||||
memory->create_kokkos(k_duMech,duMech,nlocal+nghost+1,"pair:duMech");
|
||||
d_duCond = k_duCond.view<DeviceType>();
|
||||
d_duMech = k_duMech.view<DeviceType>();
|
||||
h_duCond = k_duCond.h_view;
|
||||
h_duMech = k_duMech.h_view;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairDPDfdtEnergy::init_one(i,j);
|
||||
|
||||
k_params.h_view(i,j).cut = cut[i][j];
|
||||
k_params.h_view(i,j).a0 = a0[i][j];
|
||||
k_params.h_view(i,j).sigma = sigma[i][j];
|
||||
k_params.h_view(i,j).kappa = kappa[i][j];
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
}
|
||||
|
||||
k_cutsq.h_view(i,j) = cutone*cutone;
|
||||
k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j);
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
k_params.template modify<LMPHostType>();
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairDPDfdtEnergyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = eflag;
|
||||
const int VFLAG = vflag_either;
|
||||
|
||||
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
|
||||
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
|
||||
|
||||
if (EFLAG) {
|
||||
if (eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
|
||||
} else {
|
||||
v_eatom[i] += epairhalf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (vflag_global) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) {
|
||||
v_vatom(i,0) += 0.5*v0;
|
||||
v_vatom(i,1) += 0.5*v1;
|
||||
v_vatom(i,2) += 0.5*v2;
|
||||
v_vatom(i,3) += 0.5*v3;
|
||||
v_vatom(i,4) += 0.5*v4;
|
||||
v_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
v_vatom(j,0) += 0.5*v0;
|
||||
v_vatom(j,1) += 0.5*v1;
|
||||
v_vatom(j,2) += 0.5*v2;
|
||||
v_vatom(j,3) += 0.5*v3;
|
||||
v_vatom(j,4) += 0.5*v4;
|
||||
v_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
} else {
|
||||
v_vatom(i,0) += 0.5*v0;
|
||||
v_vatom(i,1) += 0.5*v1;
|
||||
v_vatom(i,2) += 0.5*v2;
|
||||
v_vatom(i,3) += 0.5*v3;
|
||||
v_vatom(i,4) += 0.5*v4;
|
||||
v_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int PairDPDfdtEnergyKokkos<DeviceType>::sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairDPDfdtEnergyKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairDPDfdtEnergyKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(dpd/fdt/energy/kk,PairDPDfdtEnergyKokkos<LMPDeviceType>)
|
||||
PairStyle(dpd/fdt/energy/kk/device,PairDPDfdtEnergyKokkos<LMPDeviceType>)
|
||||
PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H
|
||||
#define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H
|
||||
|
||||
#if !defined(DPD_USE_RAN_MARS) && !defined(DPD_USE_Random_XorShift64) && !defined(Random_XorShift1024)
|
||||
#define DPD_USE_Random_XorShift64
|
||||
#endif
|
||||
|
||||
#include "pair_dpd_fdt_energy.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
#include "rand_pool_wrap_kokkos.h"
|
||||
#else
|
||||
#include "Kokkos_Random.hpp"
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagPairDPDfdtEnergyZero{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
struct TagPairDPDfdtEnergyComputeSplit{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
struct TagPairDPDfdtEnergyComputeNoSplit{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairDPDfdtEnergyKokkos(class LAMMPS *);
|
||||
virtual ~PairDPDfdtEnergyKokkos();
|
||||
virtual void compute(int, int);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairDPDfdtEnergyZero, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int sbmask(const int& j) const;
|
||||
|
||||
struct params_dpd {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_dpd(){cut=0;a0=0;sigma=0;kappa=0;};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;};
|
||||
F_FLOAT cut,a0,sigma,kappa;
|
||||
};
|
||||
|
||||
DAT::tdual_efloat_1d k_duCond,k_duMech;
|
||||
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
RandPoolWrap rand_pool;
|
||||
typedef RandWrap rand_type;
|
||||
#elif defined(DPD_USE_Random_XorShift64)
|
||||
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
|
||||
#elif defined(DPD_USE_Random_XorShift1024)
|
||||
Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
|
||||
#endif
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
protected:
|
||||
int eflag,vflag;
|
||||
int nlocal,neighflag;
|
||||
double dtinvsqrt;
|
||||
double boltz,ftm2v;
|
||||
double special_lj[4];
|
||||
|
||||
virtual void allocate();
|
||||
|
||||
Kokkos::DualView<params_dpd**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_dpd**,
|
||||
Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
|
||||
// hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types
|
||||
params_dpd m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array c_x;
|
||||
typename ArrayTypes<DeviceType>::t_v_array_randomread v;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
|
||||
double *rmass;
|
||||
typename AT::t_efloat_1d dpdTheta;
|
||||
typename AT::t_efloat_1d d_duCond,d_duMech;
|
||||
HAT::t_efloat_1d h_duCond,h_duMech;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairDPDfdtEnergyKokkos>(PairDPDfdtEnergyKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: Pair dpd/fdt/energy requires ghost atoms store velocity
|
||||
|
||||
Use the communicate vel yes command to enable this.
|
||||
|
||||
E: Pair dpd/fdt/energy requires newton pair on
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,280 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(exp6/rx/kk,PairExp6rxKokkos<LMPDeviceType>)
|
||||
PairStyle(exp6/rx/kk/device,PairExp6rxKokkos<LMPDeviceType>)
|
||||
PairStyle(exp6/rx/kk/host,PairExp6rxKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_EXP6_RX_KOKKOS_H
|
||||
#define LMP_PAIR_EXP6_RX_KOKKOS_H
|
||||
|
||||
#include "pair_exp6_rx.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "pair_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// Create a structure to hold the parameter data for all
|
||||
// local and neighbor particles. Pack inside this struct
|
||||
// to avoid any name clashes.
|
||||
|
||||
template<class DeviceType>
|
||||
struct PairExp6ParamDataTypeKokkos
|
||||
{
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
int n;
|
||||
typename AT::t_float_1d epsilon1, alpha1, rm1, mixWtSite1,
|
||||
epsilon2, alpha2, rm2, mixWtSite2,
|
||||
epsilonOld1, alphaOld1, rmOld1, mixWtSite1old,
|
||||
epsilonOld2, alphaOld2, rmOld2, mixWtSite2old;
|
||||
|
||||
// Default constructor -- nullify everything.
|
||||
PairExp6ParamDataTypeKokkos<DeviceType>(void)
|
||||
: n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), mixWtSite1(NULL),
|
||||
epsilon2(NULL), alpha2(NULL), rm2(NULL), mixWtSite2(NULL),
|
||||
epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), mixWtSite1old(NULL),
|
||||
epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), mixWtSite2old(NULL)
|
||||
{}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct PairExp6ParamDataTypeKokkosVect
|
||||
{
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
typename AT::t_float_1d epsilon, rm3, alpha, xMolei, epsilon_old, rm3_old,
|
||||
alpha_old, xMolei_old, fractionOFA, fraction1,
|
||||
fraction2, nMoleculesOFA, nMolecules1, nMolecules2,
|
||||
nTotal, fractionOFAold, fractionOld1, fractionOld2,
|
||||
nMoleculesOFAold, nMoleculesOld1, nMoleculesOld2,
|
||||
nTotalold;
|
||||
|
||||
// Default constructor -- nullify everything.
|
||||
PairExp6ParamDataTypeKokkosVect<DeviceType>(void)
|
||||
: epsilon(NULL), rm3(NULL), alpha(NULL), xMolei(NULL), epsilon_old(NULL), rm3_old(NULL),
|
||||
alpha_old(NULL), xMolei_old(NULL), fractionOFA(NULL), fraction1(NULL),
|
||||
fraction2(NULL), nMoleculesOFA(NULL), nMolecules1(NULL), nMolecules2(NULL),
|
||||
nTotal(NULL), fractionOFAold(NULL), fractionOld1(NULL), fractionOld2(NULL),
|
||||
nMoleculesOFAold(NULL), nMoleculesOld1(NULL), nMoleculesOld2(NULL),
|
||||
nTotalold(NULL)
|
||||
{}
|
||||
};
|
||||
|
||||
struct TagPairExp6rxZeroMixingWeights{};
|
||||
struct TagPairExp6rxgetMixingWeights{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
struct TagPairExp6rxCompute{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
struct TagPairExp6rxComputeNoAtomics{};
|
||||
|
||||
struct TagPairExp6rxCollapseDupViews{};
|
||||
struct TagPairExp6rxZeroDupViews{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairExp6rxKokkos : public PairExp6rx {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairExp6rxKokkos(class LAMMPS *);
|
||||
virtual ~PairExp6rxKokkos();
|
||||
void compute(int, int);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxZeroMixingWeights, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxgetMixingWeights, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool Site1EqSite2, bool UseAtomics, bool OneType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void vectorized_operator(const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxCollapseDupViews, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairExp6rxZeroDupViews, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int sbmask(const int& j) const;
|
||||
|
||||
protected:
|
||||
int eflag,vflag;
|
||||
int nlocal,newton_pair,neighflag;
|
||||
double special_lj[4];
|
||||
int num_threads,ntypes;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d_randomread type;
|
||||
typename AT::t_efloat_1d uCG, uCGnew;
|
||||
typename AT::t_float_2d dvector;
|
||||
|
||||
typedef Kokkos::View<F_FLOAT**[3],Kokkos::LayoutRight,DeviceType> t_f_array_thread;
|
||||
typedef Kokkos::View<E_FLOAT**,Kokkos::LayoutRight,DeviceType> t_efloat_1d_thread;
|
||||
|
||||
t_f_array_thread t_f;
|
||||
t_efloat_1d_thread t_uCG, t_uCGnew;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
PairExp6ParamDataTypeKokkos<DeviceType> PairExp6ParamData;
|
||||
PairExp6ParamDataTypeKokkosVect<DeviceType> PairExp6ParamDataVect;
|
||||
|
||||
void allocate();
|
||||
DAT::tdual_int_1d k_mol2param; // mapping from molecule to parameters
|
||||
typename AT::t_int_1d_randomread d_mol2param;
|
||||
|
||||
typedef Kokkos::DualView<Param*,Kokkos::LayoutRight,DeviceType> tdual_param_1d;
|
||||
typedef typename tdual_param_1d::t_dev_const_randomread t_param_1d_randomread;
|
||||
|
||||
tdual_param_1d k_params; // parameter set for an I-J-K interaction
|
||||
t_param_1d_randomread d_params; // parameter set for an I-J-K interaction
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
void read_file(char *);
|
||||
void setup();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void getMixingWeights(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const;
|
||||
|
||||
template <class ArrayT>
|
||||
void getMixingWeightsVect(const int, int, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void exponentScaling(double, double &, double &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void polynomialScaling(double, double &, double &, double &) const;
|
||||
|
||||
double s_coeffAlpha[6],s_coeffEps[6],s_coeffRm[6];
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double func_rin(const double &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double expValue(const double) const;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairExp6rxKokkos>(PairExp6rxKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: alpha_ij is 6.0 in pair exp6
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: PairExp6rxKokkos requires a fix rx command
|
||||
|
||||
The fix rx command must come before the pair style command in the input file
|
||||
|
||||
E: There are no rx species specified
|
||||
|
||||
There must be at least one species specified through the fix rx command
|
||||
|
||||
E: Site1 name not recognized in pair coefficients
|
||||
|
||||
The site1 keyword does not match the species keywords specified throug the fix rx command
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: Cannot open exp6/rx potential file %s
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Incorrect format in exp6/rx potential file
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Illegal exp6/rx parameters. Rm and Epsilon must be greater than zero. Alpha cannot be negative.
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Illegal exp6/rx parameters. Interaction potential does not exist.
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Potential file has duplicate entry.
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: The number of molecules in CG particle is less than 10*DBL_EPSILON.
|
||||
|
||||
Self-explanatory. Check the species concentrations have been properly set
|
||||
and check the reaction kinetic solver parameters in fix rx to more for
|
||||
sufficient accuracy.
|
||||
|
||||
|
||||
*/
|
|
@ -0,0 +1,159 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "pair_hybrid_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "respa.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairHybridKokkos::PairHybridKokkos(LAMMPS *lmp) : PairHybrid(lmp)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
|
||||
// prevent overlapping host/device computation, which isn't
|
||||
// yet supported by pair_hybrid_kokkos
|
||||
execution_space = Device;
|
||||
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairHybridKokkos::~PairHybridKokkos()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
call each sub-style's compute() or compute_outer() function
|
||||
accumulate sub-style global/peratom energy/virial in hybrid
|
||||
for global vflag = 1:
|
||||
each sub-style computes own virial[6]
|
||||
sum sub-style virial[6] to hybrid's virial[6]
|
||||
for global vflag = 2:
|
||||
call sub-style with adjusted vflag to prevent it calling
|
||||
virial_fdotr_compute()
|
||||
hybrid calls virial_fdotr_compute() on final accumulated f
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairHybridKokkos::compute(int eflag, int vflag)
|
||||
{
|
||||
int i,j,m,n;
|
||||
|
||||
// if no_virial_fdotr_compute is set and global component of
|
||||
// incoming vflag = 2, then
|
||||
// reset vflag as if global component were 1
|
||||
// necessary since one or more sub-styles cannot compute virial as F dot r
|
||||
|
||||
int neighflag = lmp->kokkos->neighflag;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = eflag_global = vflag_global =
|
||||
eflag_atom = vflag_atom = 0;
|
||||
|
||||
// check if global component of incoming vflag = 2
|
||||
// if so, reset vflag passed to substyle as if it were 0
|
||||
// necessary so substyle will not invoke virial_fdotr_compute()
|
||||
|
||||
int vflag_substyle;
|
||||
if (vflag % 4 == 2) vflag_substyle = vflag/4 * 4;
|
||||
else vflag_substyle = vflag;
|
||||
|
||||
double *saved_special = save_special();
|
||||
|
||||
// check if we are running with r-RESPA using the hybrid keyword
|
||||
|
||||
Respa *respa = NULL;
|
||||
respaflag = 0;
|
||||
if (strstr(update->integrate_style,"respa")) {
|
||||
respa = (Respa *) update->integrate;
|
||||
if (respa->nhybrid_styles > 0) respaflag = 1;
|
||||
}
|
||||
|
||||
for (m = 0; m < nstyles; m++) {
|
||||
|
||||
set_special(m);
|
||||
|
||||
if (!respaflag || (respaflag && respa->hybrid_compute[m])) {
|
||||
|
||||
// invoke compute() unless compute flag is turned off or
|
||||
// outerflag is set and sub-style has a compute_outer() method
|
||||
|
||||
if (styles[m]->compute_flag == 0) continue;
|
||||
atomKK->sync(styles[m]->execution_space,styles[m]->datamask_read);
|
||||
if (outerflag && styles[m]->respa_enable)
|
||||
styles[m]->compute_outer(eflag,vflag_substyle);
|
||||
else styles[m]->compute(eflag,vflag_substyle);
|
||||
atomKK->modified(styles[m]->execution_space,styles[m]->datamask_modify);
|
||||
}
|
||||
|
||||
restore_special(saved_special);
|
||||
|
||||
// jump to next sub-style if r-RESPA does not want global accumulated data
|
||||
|
||||
if (respaflag && !respa->tally_global) continue;
|
||||
|
||||
if (eflag_global) {
|
||||
eng_vdwl += styles[m]->eng_vdwl;
|
||||
eng_coul += styles[m]->eng_coul;
|
||||
}
|
||||
if (vflag_global) {
|
||||
for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n];
|
||||
}
|
||||
if (eflag_atom) {
|
||||
n = atom->nlocal;
|
||||
if (force->newton_pair) n += atom->nghost;
|
||||
double *eatom_substyle = styles[m]->eatom;
|
||||
for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i];
|
||||
}
|
||||
if (vflag_atom) {
|
||||
n = atom->nlocal;
|
||||
if (force->newton_pair) n += atom->nghost;
|
||||
double **vatom_substyle = styles[m]->vatom;
|
||||
for (i = 0; i < n; i++)
|
||||
for (j = 0; j < 6; j++)
|
||||
vatom[i][j] += vatom_substyle[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
delete [] saved_special;
|
||||
|
||||
// perform virial_fdotr on device
|
||||
|
||||
atomKK->sync(Device,X_MASK|F_MASK);
|
||||
x = atomKK->k_x.view<LMPDeviceType>();
|
||||
f = atomKK->k_f.view<LMPDeviceType>();
|
||||
|
||||
if (vflag_fdotr)
|
||||
pair_virial_fdotr_compute(this);
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(hybrid/kk,PairHybridKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_HYBRID_KOKKOS_H
|
||||
#define LMP_PAIR_HYBRID_KOKKOS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "pair_hybrid.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairHybridKokkos : public PairHybrid {
|
||||
friend class FixGPU;
|
||||
friend class FixIntel;
|
||||
friend class FixOMP;
|
||||
friend class Force;
|
||||
friend class Respa;
|
||||
friend class Info;
|
||||
public:
|
||||
typedef LMPDeviceType device_type;
|
||||
|
||||
PairHybridKokkos(class LAMMPS *);
|
||||
virtual ~PairHybridKokkos();
|
||||
void compute(int, int);
|
||||
|
||||
private:
|
||||
DAT::t_x_array_randomread x;
|
||||
DAT::t_f_array f;
|
||||
friend void pair_virial_fdotr_compute<PairHybridKokkos>(PairHybridKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Pair style hybrid cannot have hybrid as an argument
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Pair style hybrid cannot have none as an argument
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: Pair coeff for hybrid has invalid style
|
||||
|
||||
Style in pair coeff must have been listed in pair_style command.
|
||||
|
||||
E: Pair hybrid sub-style is not used
|
||||
|
||||
No pair_coeff command used a sub-style specified in the pair_style
|
||||
command.
|
||||
|
||||
E: Pair_modify special setting for pair hybrid incompatible with global special_bonds setting
|
||||
|
||||
Cannot override a setting of 0.0 or 1.0 or change a setting between
|
||||
0.0 and 1.0.
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: Invoked pair single on pair style none
|
||||
|
||||
A command (e.g. a dump) attempted to invoke the single() function on a
|
||||
pair style none, which is illegal. You are probably attempting to
|
||||
compute per-atom quantities with an undefined pair style.
|
||||
|
||||
E: Pair hybrid sub-style does not support single call
|
||||
|
||||
You are attempting to invoke a single() call on a pair style
|
||||
that doesn't support it.
|
||||
|
||||
E: Pair hybrid single calls do not support per sub-style special bond values
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Unknown pair_modify hybrid sub-style
|
||||
|
||||
The choice of sub-style is unknown.
|
||||
|
||||
E: Coulomb cutoffs of pair hybrid sub-styles do not match
|
||||
|
||||
If using a Kspace solver, all Coulomb cutoffs of long pair styles must
|
||||
be the same.
|
||||
|
||||
*/
|
|
@ -0,0 +1,107 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "pair_hybrid_overlay_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_request.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairHybridOverlayKokkos::PairHybridOverlayKokkos(LAMMPS *lmp) : PairHybridKokkos(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairHybridOverlayKokkos::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi,jlo,jhi;
|
||||
force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
|
||||
force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
|
||||
|
||||
// 3rd arg = pair sub-style name
|
||||
// 4th arg = pair sub-style index if name used multiple times
|
||||
// allow for "none" as valid sub-style name
|
||||
|
||||
int multflag;
|
||||
int m;
|
||||
|
||||
for (m = 0; m < nstyles; m++) {
|
||||
multflag = 0;
|
||||
if (strcmp(arg[2],keywords[m]) == 0) {
|
||||
if (multiple[m]) {
|
||||
multflag = 1;
|
||||
if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (!isdigit(arg[3][0]))
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
int index = force->inumeric(FLERR,arg[3]);
|
||||
if (index == multiple[m]) break;
|
||||
else continue;
|
||||
} else break;
|
||||
}
|
||||
}
|
||||
|
||||
int none = 0;
|
||||
if (m == nstyles) {
|
||||
if (strcmp(arg[2],"none") == 0) none = 1;
|
||||
else error->all(FLERR,"Pair coeff for hybrid has invalid style");
|
||||
}
|
||||
|
||||
// move 1st/2nd args to 2nd/3rd args
|
||||
// if multflag: move 1st/2nd args to 3rd/4th args
|
||||
// just copy ptrs, since arg[] points into original input line
|
||||
|
||||
arg[2+multflag] = arg[1];
|
||||
arg[1+multflag] = arg[0];
|
||||
|
||||
// invoke sub-style coeff() starting with 1st remaining arg
|
||||
|
||||
if (!none) styles[m]->coeff(narg-1-multflag,&arg[1+multflag]);
|
||||
|
||||
// set setflag and which type pairs map to which sub-style
|
||||
// if sub-style is none: set hybrid subflag, wipe out map
|
||||
// else: set hybrid setflag & map only if substyle setflag is set
|
||||
// if sub-style is new for type pair, add as multiple mapping
|
||||
// if sub-style exists for type pair, don't add, just update coeffs
|
||||
|
||||
int count = 0;
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
for (int j = MAX(jlo,i); j <= jhi; j++) {
|
||||
if (none) {
|
||||
setflag[i][j] = 1;
|
||||
nmap[i][j] = 0;
|
||||
count++;
|
||||
} else if (styles[m]->setflag[i][j]) {
|
||||
int k;
|
||||
for (k = 0; k < nmap[i][j]; k++)
|
||||
if (map[i][j][k] == m) break;
|
||||
if (k == nmap[i][j]) map[i][j][nmap[i][j]++] = m;
|
||||
setflag[i][j] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
|
@ -11,27 +11,24 @@
|
|||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
NPairStyle(halffull/newton/ssa,
|
||||
NPairHalffullNewtonSSA,
|
||||
NP_HALF_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON |
|
||||
NP_ORTHO | NP_TRI | NP_SSA)
|
||||
PairStyle(hybrid/overlay/kk,PairHybridOverlayKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H
|
||||
#ifndef LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H
|
||||
#define LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "pair_hybrid_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonSSA : public NPair {
|
||||
class PairHybridOverlayKokkos : public PairHybridKokkos {
|
||||
public:
|
||||
NPairHalffullNewtonSSA(class LAMMPS *);
|
||||
~NPairHalffullNewtonSSA() {}
|
||||
void build(class NeighList *);
|
||||
PairHybridOverlayKokkos(class LAMMPS *);
|
||||
virtual ~PairHybridOverlayKokkos() {}
|
||||
void coeff(int, char **);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -41,4 +38,12 @@ class NPairHalffullNewtonSSA : public NPair {
|
|||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: Pair coeff for hybrid has invalid style
|
||||
|
||||
Style in pair coeff must have been listed in pair_style command.
|
||||
|
||||
*/
|
|
@ -0,0 +1,998 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
Stan Moore (Sandia)
|
||||
|
||||
Please cite the related publications:
|
||||
J.D. Moore, B.C. Barnes, S. Izvekov, M. Lisal, M.S. Sellers, D.E. Taylor & J.K. Brennan
|
||||
"A coarse-grain force field for RDX: Density dependent and energy conserving"
|
||||
The Journal of Chemical Physics, 2016, 144, 104501.
|
||||
------------------------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <math.h>
|
||||
#include "math_const.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pair_multi_lucy_rx_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "comm.h"
|
||||
#include "neigh_list.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "citeme.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "atom_masks.h"
|
||||
#include "neigh_request.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NONE,RLINEAR,RSQ};
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
#ifdef DBL_EPSILON
|
||||
#define MY_EPSILON (10.0*DBL_EPSILON)
|
||||
#else
|
||||
#define MY_EPSILON (10.0*2.220446049250313e-16)
|
||||
#endif
|
||||
|
||||
#define oneFluidParameter (-1)
|
||||
#define isOneFluid(_site) ( (_site) == oneFluidParameter )
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMultiLucyRX(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
update_table = 1;
|
||||
h_table = new TableHost();
|
||||
d_table = new TableDevice();
|
||||
|
||||
k_error_flag = DAT::tdual_int_scalar("pair:error_flag");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairMultiLucyRXKokkos<DeviceType>::~PairMultiLucyRXKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
|
||||
memory->destroy_kokkos(k_cutsq,cutsq);
|
||||
|
||||
delete h_table;
|
||||
delete d_table;
|
||||
tabindex = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairMultiLucyRX::init_style();
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
if (update_table)
|
||||
create_kokkos_tables();
|
||||
|
||||
if (tabstyle == LOOKUP)
|
||||
compute_style<LOOKUP>(eflag_in,vflag_in);
|
||||
else if(tabstyle == LINEAR)
|
||||
compute_style<LINEAR>(eflag_in,vflag_in);
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int TABSTYLE>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag,0);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.template view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.template view<DeviceType>();
|
||||
}
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
rho = atomKK->k_rho.view<DeviceType>();
|
||||
uCG = atomKK->k_uCG.view<DeviceType>();
|
||||
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
|
||||
dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
|
||||
nlocal = atom->nlocal;
|
||||
int nghost = atom->nghost;
|
||||
int newton_pair = force->newton_pair;
|
||||
|
||||
{
|
||||
const int ntotal = nlocal + nghost;
|
||||
if (ntotal > d_mixWtSite1.dimension_0()) {
|
||||
d_mixWtSite1old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1old",ntotal);
|
||||
d_mixWtSite2old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2old",ntotal);
|
||||
d_mixWtSite1 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1",ntotal);
|
||||
d_mixWtSite2 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2",ntotal);
|
||||
}
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXgetMixingWeights>(0,ntotal),*this);
|
||||
}
|
||||
|
||||
const int inum = list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
computeLocalDensity();
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0,TABSTYLE> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,0,TABSTYLE> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,0,TABSTYLE> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,0,TABSTYLE> >(0,inum),*this);
|
||||
}
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair) {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,1,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,1,0,TABSTYLE> >(0,inum),*this);
|
||||
} else {
|
||||
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,0,1,TABSTYLE> >(0,inum),*this,ev);
|
||||
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,0,0,TABSTYLE> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
|
||||
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
k_error_flag.template modify<DeviceType>();
|
||||
k_error_flag.template sync<LMPHostType>();
|
||||
if (k_error_flag.h_view() == 1)
|
||||
error->one(FLERR,"Density < table inner cutoff");
|
||||
else if (k_error_flag.h_view() == 2)
|
||||
error->one(FLERR,"Density > table outer cutoff");
|
||||
else if (k_error_flag.h_view() == 3)
|
||||
error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx");
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXgetMixingWeights, const int &i) const {
|
||||
getMixingWeights(i, d_mixWtSite1old[i], d_mixWtSite2old[i], d_mixWtSite1[i], d_mixWtSite2[i]);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
|
||||
|
||||
int i,jj,jnum,itype,jtype,itable;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
|
||||
double rsq;
|
||||
|
||||
double mixWtSite1old_i,mixWtSite1old_j;
|
||||
double mixWtSite2old_i,mixWtSite2old_j;
|
||||
double mixWtSite1_i;
|
||||
|
||||
double pi = MathConst::MY_PI;
|
||||
double A_i, A_j;
|
||||
double fraction_i,fraction_j;
|
||||
int jtable;
|
||||
|
||||
int tlm1 = tablength - 1;
|
||||
|
||||
i = d_ilist[ii];
|
||||
xtmp = x(i,0);
|
||||
ytmp = x(i,1);
|
||||
ztmp = x(i,2);
|
||||
itype = type[i];
|
||||
jnum = d_numneigh[i];
|
||||
|
||||
double fx_i = 0.0;
|
||||
double fy_i = 0.0;
|
||||
double fz_i = 0.0;
|
||||
|
||||
mixWtSite1old_i = d_mixWtSite1old[i];
|
||||
mixWtSite2old_i = d_mixWtSite2old[i];
|
||||
mixWtSite1_i = d_mixWtSite1[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors(i,jj);
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x(j,0);
|
||||
dely = ytmp - x(j,1);
|
||||
delz = ztmp - x(j,2);
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < d_cutsq(itype,jtype)) { // optimize
|
||||
fpair = 0.0;
|
||||
|
||||
mixWtSite1old_j = d_mixWtSite1old[j];
|
||||
mixWtSite2old_j = d_mixWtSite2old[j];
|
||||
|
||||
//tb = &tables[tabindex[itype][jtype]];
|
||||
const int tidx = d_table_const.tabindex(itype,jtype);
|
||||
|
||||
//if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){
|
||||
if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){
|
||||
k_error_flag.template view<DeviceType>()() = 1;
|
||||
}
|
||||
|
||||
if (TABSTYLE == LOOKUP) {
|
||||
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
|
||||
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
|
||||
jtable = static_cast<int> (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
if (itable >= tlm1 || jtable >= tlm1){
|
||||
k_error_flag.template view<DeviceType>()() = 2;
|
||||
}
|
||||
//A_i = tb->f[itable];
|
||||
A_i = d_table_const.f(tidx,itable);
|
||||
//A_j = tb->f[jtable];
|
||||
A_j = d_table_const.f(tidx,jtable);
|
||||
|
||||
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
|
||||
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
|
||||
fpair /= sqrt(rsq);
|
||||
|
||||
} else if (TABSTYLE == LINEAR) {
|
||||
|
||||
//itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta);
|
||||
itable = static_cast<int> ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
|
||||
jtable = static_cast<int> ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
if (itable >= tlm1 || jtable >= tlm1){
|
||||
k_error_flag.template view<DeviceType>()() = 2;
|
||||
}
|
||||
if(itable<0) itable=0;
|
||||
if(itable>=tlm1) itable=tlm1;
|
||||
if(jtable<0) jtable=0;
|
||||
if(jtable>=tlm1)jtable=tlm1;
|
||||
|
||||
//fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
|
||||
fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
|
||||
//fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta);
|
||||
fraction_j = (((rho[j]*rho[j]) - d_table_const.rsq(tidx,jtable)) * d_table_const.invdelta(tidx));
|
||||
if(itable==0) fraction_i=0.0;
|
||||
if(itable==tlm1) fraction_i=0.0;
|
||||
if(jtable==0) fraction_j=0.0;
|
||||
if(jtable==tlm1) fraction_j=0.0;
|
||||
|
||||
//A_i = tb->f[itable] + fraction_i*tb->df[itable];
|
||||
A_i = d_table_const.f(tidx,itable) + fraction_i*d_table_const.df(tidx,itable);
|
||||
//A_j = tb->f[jtable] + fraction_j*tb->df[jtable];
|
||||
A_j = d_table_const.f(tidx,jtable) + fraction_j*d_table_const.df(tidx,jtable);
|
||||
|
||||
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
|
||||
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
|
||||
fpair /= sqrt(rsq);
|
||||
|
||||
} else k_error_flag.template view<DeviceType>()() = 3;
|
||||
|
||||
if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpair;
|
||||
else fpair = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*fpair;
|
||||
|
||||
fx_i += delx*fpair;
|
||||
fy_i += dely*fpair;
|
||||
fz_i += delz*fpair;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
a_f(j,0) -= delx*fpair;
|
||||
a_f(j,1) -= dely*fpair;
|
||||
a_f(j,2) -= delz*fpair;
|
||||
}
|
||||
//if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz);
|
||||
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
|
||||
a_f(i,0) += fx_i;
|
||||
a_f(i,1) += fy_i;
|
||||
a_f(i,2) += fz_i;
|
||||
|
||||
//tb = &tables[tabindex[itype][itype]];
|
||||
const int tidx = d_table_const.tabindex(itype,itype);
|
||||
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
|
||||
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//if (TABSTYLE == LOOKUP) evdwl = tb->e[itable];
|
||||
if (TABSTYLE == LOOKUP) {
|
||||
evdwl = d_table_const.e(tidx,itable);
|
||||
} else if (TABSTYLE == LINEAR) {
|
||||
if (itable >= tlm1){
|
||||
k_error_flag.template view<DeviceType>()() = 2;
|
||||
}
|
||||
if(itable==0) fraction_i=0.0;
|
||||
//else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
|
||||
else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
|
||||
//evdwl = tb->e[itable] + fraction_i*tb->de[itable];
|
||||
evdwl = d_table_const.e(tidx,itable) + fraction_i*d_table_const.de(tidx,itable);
|
||||
} else k_error_flag.template view<DeviceType>()() = 3;
|
||||
|
||||
evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0;
|
||||
evdwlOld = mixWtSite1old_i*evdwl;
|
||||
evdwl = mixWtSite1_i*evdwl;
|
||||
|
||||
uCG[i] += evdwlOld;
|
||||
uCGnew[i] += evdwl;
|
||||
|
||||
evdwl = evdwlOld;
|
||||
|
||||
//if (evflag) ev_tally(0,0,nlocal,newton_pair,evdwl,0.0,0.0,0.0,0.0,0.0);
|
||||
if (EVFLAG)
|
||||
ev.evdwl += ((/*FIXME??? (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && */ NEWTON_PAIR)?1.0:0.5)*evdwl;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(), ii, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity()
|
||||
{
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
rho = atomKK->k_rho.view<DeviceType>();
|
||||
h_rho = atomKK->k_rho.h_view;
|
||||
nlocal = atom->nlocal;
|
||||
|
||||
atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK);
|
||||
|
||||
const int inum = list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
const double pi = MathConst::MY_PI;
|
||||
|
||||
const bool newton_pair = force->newton_pair;
|
||||
const bool one_type = (atom->ntypes == 1);
|
||||
|
||||
// Special cut-off values for when there's only one type.
|
||||
cutsq_type11 = cutsq[1][1];
|
||||
rcut_type11 = sqrt(cutsq_type11);
|
||||
factor_type11 = 84.0/(5.0*pi*rcut_type11*rcut_type11*rcut_type11);
|
||||
|
||||
// zero out density
|
||||
int m = nlocal;
|
||||
if (newton_pair) m += atom->nghost;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXZero>(0,m),*this);
|
||||
|
||||
// rho = density at each atom
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
if (neighflag == HALF) {
|
||||
if (newton_pair)
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1,false> >(0,inum),*this);
|
||||
else
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0,false> >(0,inum),*this);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
if (newton_pair)
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1,false> >(0,inum),*this);
|
||||
else
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0,false> >(0,inum),*this);
|
||||
} else if (neighflag == FULL) {
|
||||
if (newton_pair)
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,1,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,1,false> >(0,inum),*this);
|
||||
else
|
||||
if (one_type)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,0,true> >(0,inum),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,0,false> >(0,inum),*this);
|
||||
}
|
||||
|
||||
atomKK->modified(execution_space,DPDRHO_MASK);
|
||||
|
||||
// communicate and sum densities (on the host)
|
||||
|
||||
if (newton_pair)
|
||||
comm->reverse_comm_pair(this);
|
||||
|
||||
comm->forward_comm_pair(this);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXZero, const int &i) const {
|
||||
rho[i] = 0.0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR,ONE_TYPE>, const int &ii) const {
|
||||
|
||||
|
||||
// The rho array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_rho = rho;
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
|
||||
double rho_i_contrib = 0.0;
|
||||
|
||||
const int itype = type[i];
|
||||
const int jnum = d_numneigh[i];
|
||||
|
||||
const double pi = MathConst::MY_PI;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++){
|
||||
const int j = (d_neighbors(i,jj) & NEIGHMASK);
|
||||
const int jtype = type[j];
|
||||
|
||||
const double delx = xtmp - x(j,0);
|
||||
const double dely = ytmp - x(j,1);
|
||||
const double delz = ztmp - x(j,2);
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (ONE_TYPE) {
|
||||
if (rsq < cutsq_type11) {
|
||||
const double rcut = rcut_type11;
|
||||
const double r_over_rcut = sqrt(rsq) / rcut;
|
||||
const double tmpFactor = 1.0 - r_over_rcut;
|
||||
const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
|
||||
const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4;
|
||||
rho_i_contrib += factor;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal))
|
||||
a_rho[j] += factor;
|
||||
}
|
||||
} else if (rsq < d_cutsq(itype,jtype)) {
|
||||
const double rcut = sqrt(d_cutsq(itype,jtype));
|
||||
const double tmpFactor = 1.0-sqrt(rsq)/rcut;
|
||||
const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
|
||||
const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4;
|
||||
rho_i_contrib += factor;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal))
|
||||
a_rho[j] += factor;
|
||||
}
|
||||
}
|
||||
|
||||
a_rho[i] += rho_i_contrib;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) const
|
||||
{
|
||||
double fractionOFAold, fractionOFA;
|
||||
double fractionOld1, fraction1;
|
||||
double fractionOld2, fraction2;
|
||||
double nMoleculesOFAold, nMoleculesOFA;
|
||||
double nMoleculesOld1, nMolecules1;
|
||||
double nMoleculesOld2, nMolecules2;
|
||||
double nTotal, nTotalOld;
|
||||
|
||||
|
||||
nTotal = 0.0;
|
||||
nTotalOld = 0.0;
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++){
|
||||
nTotal += dvector(ispecies,id);
|
||||
nTotalOld += dvector(ispecies+nspecies,id);
|
||||
}
|
||||
|
||||
if (isOneFluid(isite1) == false){
|
||||
nMoleculesOld1 = dvector(isite1+nspecies,id);
|
||||
nMolecules1 = dvector(isite1,id);
|
||||
fractionOld1 = nMoleculesOld1/nTotalOld;
|
||||
fraction1 = nMolecules1/nTotal;
|
||||
}
|
||||
if (isOneFluid(isite2) == false){
|
||||
nMoleculesOld2 = dvector(isite2+nspecies,id);
|
||||
nMolecules2 = dvector(isite2,id);
|
||||
fractionOld2 = nMoleculesOld2/nTotalOld;
|
||||
fraction2 = nMolecules2/nTotal;
|
||||
}
|
||||
|
||||
if (isOneFluid(isite1) || isOneFluid(isite2)){
|
||||
nMoleculesOFAold = 0.0;
|
||||
nMoleculesOFA = 0.0;
|
||||
fractionOFAold = 0.0;
|
||||
fractionOFA = 0.0;
|
||||
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++){
|
||||
if (isite1 == ispecies || isite2 == ispecies) continue;
|
||||
nMoleculesOFAold += dvector(ispecies+nspecies,id);
|
||||
nMoleculesOFA += dvector(ispecies,id);
|
||||
fractionOFAold += dvector(ispecies+nspecies,id) / nTotalOld;
|
||||
fractionOFA += dvector(ispecies,id) / nTotal;
|
||||
}
|
||||
if (isOneFluid(isite1)){
|
||||
nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold);
|
||||
nMolecules1 = 1.0-(nTotal-nMoleculesOFA);
|
||||
fractionOld1 = fractionOFAold;
|
||||
fraction1 = fractionOFA;
|
||||
}
|
||||
if (isOneFluid(isite2)){
|
||||
nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold);
|
||||
nMolecules2 = 1.0-(nTotal-nMoleculesOFA);
|
||||
fractionOld2 = fractionOFAold;
|
||||
fraction2 = fractionOFA;
|
||||
}
|
||||
}
|
||||
|
||||
if(fractionalWeighting){
|
||||
mixWtSite1old = fractionOld1;
|
||||
mixWtSite1 = fraction1;
|
||||
mixWtSite2old = fractionOld2;
|
||||
mixWtSite2 = fraction2;
|
||||
} else {
|
||||
mixWtSite1old = nMoleculesOld1;
|
||||
mixWtSite1 = nMolecules1;
|
||||
mixWtSite2old = nMoleculesOld2;
|
||||
mixWtSite2 = nMolecules2;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
atomKK->sync(execution_space,DPDRHO_MASK);
|
||||
|
||||
d_sendlist = k_sendlist.view<DeviceType>();
|
||||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXPackForwardComm>(0,n),*this);
|
||||
return n;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const {
|
||||
int j = d_sendlist(iswap, i);
|
||||
v_buf[i] = rho[j];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
|
||||
{
|
||||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXUnpackForwardComm>(0,n),*this);
|
||||
|
||||
atomKK->modified(execution_space,DPDRHO_MASK);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXUnpackForwardComm, const int &i) const {
|
||||
rho[i + first] = v_buf[i];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
|
||||
{
|
||||
int i,j,m;
|
||||
|
||||
atomKK->sync(Host,DPDRHO_MASK);
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
buf[m++] = h_rho[j];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) h_rho[i] = buf[m++];
|
||||
|
||||
atomKK->modified(Host,DPDRHO_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMultiLucyRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
|
||||
atomKK->sync(Host,DPDRHO_MASK);
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) buf[m++] = h_rho[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
h_rho[j] += buf[m++];
|
||||
}
|
||||
|
||||
atomKK->modified(Host,DPDRHO_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMultiLucyRXKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = eflag;
|
||||
const int VFLAG = vflag_either;
|
||||
|
||||
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
|
||||
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
|
||||
|
||||
if (EFLAG) {
|
||||
if (eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
|
||||
} else {
|
||||
v_eatom[i] += epairhalf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (vflag_global) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR || i < nlocal) {
|
||||
v_vatom(i,0) += 0.5*v0;
|
||||
v_vatom(i,1) += 0.5*v1;
|
||||
v_vatom(i,2) += 0.5*v2;
|
||||
v_vatom(i,3) += 0.5*v3;
|
||||
v_vatom(i,4) += 0.5*v4;
|
||||
v_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
v_vatom(j,0) += 0.5*v0;
|
||||
v_vatom(j,1) += 0.5*v1;
|
||||
v_vatom(j,2) += 0.5*v2;
|
||||
v_vatom(j,3) += 0.5*v3;
|
||||
v_vatom(j,4) += 0.5*v4;
|
||||
v_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
} else {
|
||||
v_vatom(i,0) += 0.5*v0;
|
||||
v_vatom(i,1) += 0.5*v1;
|
||||
v_vatom(i,2) += 0.5*v2;
|
||||
v_vatom(i,3) += 0.5*v3;
|
||||
v_vatom(i,4) += 0.5*v4;
|
||||
v_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::create_kokkos_tables()
|
||||
{
|
||||
const int tlm1 = tablength-1;
|
||||
|
||||
memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq");
|
||||
memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
|
||||
|
||||
if(tabstyle == LOOKUP) {
|
||||
memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e");
|
||||
memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f");
|
||||
}
|
||||
|
||||
if(tabstyle == LINEAR) {
|
||||
memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
|
||||
memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
|
||||
memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
|
||||
memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
|
||||
memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df");
|
||||
}
|
||||
|
||||
for(int i=0; i < ntables; i++) {
|
||||
Table* tb = &tables[i];
|
||||
|
||||
h_table->innersq[i] = tb->innersq;
|
||||
h_table->invdelta[i] = tb->invdelta;
|
||||
|
||||
for(int j = 0; j<h_table->rsq.dimension_1(); j++)
|
||||
h_table->rsq(i,j) = tb->rsq[j];
|
||||
for(int j = 0; j<h_table->e.dimension_1(); j++)
|
||||
h_table->e(i,j) = tb->e[j];
|
||||
for(int j = 0; j<h_table->de.dimension_1(); j++)
|
||||
h_table->de(i,j) = tb->de[j];
|
||||
for(int j = 0; j<h_table->f.dimension_1(); j++)
|
||||
h_table->f(i,j) = tb->f[j];
|
||||
for(int j = 0; j<h_table->df.dimension_1(); j++)
|
||||
h_table->df(i,j) = tb->df[j];
|
||||
}
|
||||
|
||||
|
||||
Kokkos::deep_copy(d_table->innersq,h_table->innersq);
|
||||
Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
|
||||
Kokkos::deep_copy(d_table->rsq,h_table->rsq);
|
||||
Kokkos::deep_copy(d_table->e,h_table->e);
|
||||
Kokkos::deep_copy(d_table->de,h_table->de);
|
||||
Kokkos::deep_copy(d_table->f,h_table->f);
|
||||
Kokkos::deep_copy(d_table->df,h_table->df);
|
||||
Kokkos::deep_copy(d_table->tabindex,h_table->tabindex);
|
||||
|
||||
d_table_const.innersq = d_table->innersq;
|
||||
d_table_const.invdelta = d_table->invdelta;
|
||||
d_table_const.rsq = d_table->rsq;
|
||||
d_table_const.e = d_table->e;
|
||||
d_table_const.de = d_table->de;
|
||||
d_table_const.f = d_table->f;
|
||||
d_table_const.df = d_table->df;
|
||||
|
||||
update_table = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
const int nt = atom->ntypes + 1;
|
||||
|
||||
memory->create(setflag,nt,nt,"pair:setflag");
|
||||
|
||||
memory->create_kokkos(k_cutsq,cutsq,nt,nt,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
|
||||
memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex");
|
||||
d_table_const.tabindex = d_table->tabindex;
|
||||
|
||||
memset(&setflag[0][0],0,nt*nt*sizeof(int));
|
||||
memset(&cutsq[0][0],0,nt*nt*sizeof(double));
|
||||
memset(&tabindex[0][0],0,nt*nt*sizeof(int));
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMultiLucyRXKokkos<DeviceType>::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg < 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
// new settings
|
||||
|
||||
if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP;
|
||||
else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR;
|
||||
else error->all(FLERR,"Unknown table style in pair_style command");
|
||||
|
||||
tablength = force->inumeric(FLERR,arg[1]);
|
||||
if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries");
|
||||
|
||||
// optional keywords
|
||||
|
||||
int iarg = 2;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true;
|
||||
else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false;
|
||||
else error->all(FLERR,"Illegal pair_style command");
|
||||
iarg++;
|
||||
}
|
||||
|
||||
// delete old tables, since cannot just change settings
|
||||
|
||||
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
|
||||
memory->sfree(tables);
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
|
||||
d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d();
|
||||
h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d();
|
||||
}
|
||||
allocated = 0;
|
||||
|
||||
ntables = 0;
|
||||
tables = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairMultiLucyRXKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairMultiLucyRXKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,266 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(multi/lucy/rx/kk,PairMultiLucyRXKokkos<LMPDeviceType>)
|
||||
PairStyle(multi/lucy/rx/kk/device,PairMultiLucyRXKokkos<LMPDeviceType>)
|
||||
PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H
|
||||
#define LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H
|
||||
|
||||
|
||||
#include "pair_multi_lucy_rx.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagPairMultiLucyRXPackForwardComm{};
|
||||
struct TagPairMultiLucyRXUnpackForwardComm{};
|
||||
|
||||
struct TagPairMultiLucyRXgetMixingWeights{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
|
||||
struct TagPairMultiLucyRXCompute{};
|
||||
|
||||
struct TagPairMultiLucyRXZero{};
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
|
||||
struct TagPairMultiLucyRXComputeLocalDensity{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairMultiLucyRXKokkos : public PairMultiLucyRX {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairMultiLucyRXKokkos(class LAMMPS *);
|
||||
virtual ~PairMultiLucyRXKokkos();
|
||||
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
|
||||
template<int TABSTYLE>
|
||||
void compute_style(int, int);
|
||||
|
||||
void init_style();
|
||||
int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
|
||||
int, int *);
|
||||
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
|
||||
int pack_forward_comm(int, int *, double *, int, int *);
|
||||
void unpack_forward_comm(int, int, double *);
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
void computeLocalDensity();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXPackForwardComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXgetMixingWeights, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXZero, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR,ONE_TYPE>, const int&) const;
|
||||
|
||||
template<int NEIGHFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
|
||||
private:
|
||||
int nlocal;
|
||||
int neighflag;
|
||||
int eflag,vflag;
|
||||
|
||||
double cutsq_type11;
|
||||
double rcut_type11;
|
||||
double factor_type11;
|
||||
|
||||
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
|
||||
|
||||
//struct Table {
|
||||
// int ninput,rflag,fpflag,match;
|
||||
// double rlo,rhi,fplo,fphi,cut;
|
||||
// double *rfile,*efile,*ffile;
|
||||
// double *e2file,*f2file;
|
||||
// double innersq,delta,invdelta,deltasq6;
|
||||
// double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
|
||||
//};
|
||||
|
||||
/*struct TableDeviceConst {
|
||||
typename AT::t_int_2d_randomread tabindex;
|
||||
typename AT::t_ffloat_1d_randomread innersq,invdelta;
|
||||
typename AT::t_ffloat_2d_randomread rsq,e,de,f,df;
|
||||
};*/
|
||||
//Its faster not to use texture fetch if the number of tables is less than 32!
|
||||
struct TableDeviceConst {
|
||||
typename AT::t_int_2d tabindex;
|
||||
typename AT::t_ffloat_1d innersq,invdelta;
|
||||
typename AT::t_ffloat_2d_randomread rsq,e,de,f,df;
|
||||
};
|
||||
|
||||
struct TableDevice {
|
||||
typename AT::t_int_2d tabindex;
|
||||
typename AT::t_ffloat_1d innersq,invdelta;
|
||||
typename AT::t_ffloat_2d rsq,e,de,f,df;
|
||||
};
|
||||
|
||||
struct TableHost {
|
||||
HAT::t_int_2d tabindex;
|
||||
HAT::t_ffloat_1d innersq,invdelta;
|
||||
HAT::t_ffloat_2d rsq,e,de,f,df;
|
||||
};
|
||||
|
||||
TableDeviceConst d_table_const;
|
||||
TableDevice* d_table;
|
||||
TableHost* h_table;
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
void allocate();
|
||||
int update_table;
|
||||
void create_kokkos_tables();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void getMixingWeights(int, double &, double &, double &, double &) const;
|
||||
|
||||
typename AT::t_float_1d d_mixWtSite1old,d_mixWtSite2old,d_mixWtSite1,d_mixWtSite2;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d_randomread type;
|
||||
typename AT::t_efloat_1d rho;
|
||||
typename HAT::t_efloat_1d h_rho;
|
||||
typename AT::t_efloat_1d uCG, uCGnew;
|
||||
typename AT::t_float_2d dvector;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
|
||||
typename AT::tdual_ffloat_2d k_cutsq;
|
||||
typename AT::t_ffloat_2d d_cutsq;
|
||||
|
||||
int iswap;
|
||||
int first;
|
||||
typename AT::t_int_2d d_sendlist;
|
||||
typename AT::t_xfloat_1d_um v_buf;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairMultiLucyRXKokkos>(PairMultiLucyRXKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Pair multi/lucy/rx command requires atom_style with density (e.g. dpd, meso)
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Density < table inner cutoff
|
||||
|
||||
The local density inner is smaller than the inner cutoff
|
||||
|
||||
E: Density > table inner cutoff
|
||||
|
||||
The local density inner is greater than the inner cutoff
|
||||
|
||||
E: Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Unknown table style in pair_style command
|
||||
|
||||
Self-explanatory
|
||||
|
||||
E: Illegal number of pair table entries
|
||||
|
||||
There must be at least 2 table entries.
|
||||
|
||||
E: Illegal pair_coeff command
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: PairMultiLucyRXKokkos requires a fix rx command
|
||||
|
||||
The fix rx command must come before the pair style command in the input file
|
||||
|
||||
E: There are no rx species specified
|
||||
|
||||
There must be at least one species specified through the fix rx command
|
||||
|
||||
E: Invalid pair table length
|
||||
|
||||
Length of read-in pair table is invalid
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: Cannot open file %s
|
||||
|
||||
The specified file cannot be opened. Check that the path and name are
|
||||
correct.
|
||||
|
||||
E: Did not find keyword in table file
|
||||
|
||||
Keyword used in pair_coeff command was not found in table file.
|
||||
|
||||
E: Invalid keyword in pair table parameters
|
||||
|
||||
Keyword used in list of table parameters is not recognized.
|
||||
|
||||
E: Pair table parameters did not set N
|
||||
|
||||
List of pair table parameters must include N setting.
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,122 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(table/rx/kk,PairTableRXKokkos<LMPDeviceType>)
|
||||
PairStyle(table/rx/kk/device,PairTableRXKokkos<LMPDeviceType>)
|
||||
PairStyle(table/rx/kk/host,PairTableRXKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_TABLE_RX_KOKKOS_H
|
||||
#define LMP_PAIR_TABLE_RX_KOKKOS_H
|
||||
|
||||
#include "pair_table_kokkos.h"
|
||||
#include "kokkos_few.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairTableRXKokkos : public PairTable {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
typedef DeviceType device_type;
|
||||
|
||||
PairTableRXKokkos(class LAMMPS *);
|
||||
virtual ~PairTableRXKokkos();
|
||||
|
||||
virtual void compute(int, int);
|
||||
|
||||
template<int TABSTYLE>
|
||||
void compute_style(int, int);
|
||||
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
double init_one(int, int);
|
||||
virtual double single(int, int, int, int, double, double, double, double &);
|
||||
|
||||
void init_style();
|
||||
|
||||
struct TableDeviceConst {
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
struct TableDevice {
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
struct TableHost {
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
TableDeviceConst d_table_const;
|
||||
TableDevice* d_table;
|
||||
TableHost* h_table;
|
||||
|
||||
Few<Few<F_FLOAT, MAX_TYPES_STACKPARAMS+1>, MAX_TYPES_STACKPARAMS+1> m_cutsq;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
virtual void allocate();
|
||||
void compute_table(Table *);
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
|
||||
int neighflag;
|
||||
|
||||
int update_table;
|
||||
void create_kokkos_tables();
|
||||
void cleanup_copy();
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairTableRXKokkos>(PairTableRXKokkos*);
|
||||
|
||||
/* PairTableRX members */
|
||||
|
||||
Kokkos::View<double*, DeviceType> mixWtSite1old;
|
||||
Kokkos::View<double*, DeviceType> mixWtSite2old;
|
||||
Kokkos::View<double*, DeviceType> mixWtSite1;
|
||||
Kokkos::View<double*, DeviceType> mixWtSite2;
|
||||
|
||||
int nspecies;
|
||||
char *site1, *site2;
|
||||
int isite1, isite2;
|
||||
bool fractionalWeighting;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
|
||||
typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,72 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "comm.h"
|
||||
#include "rand_pool_wrap_kokkos.h"
|
||||
#include "lammps.h"
|
||||
#include "kokkos.h"
|
||||
#include "random_mars.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp)
|
||||
{
|
||||
random_thr = NULL;
|
||||
nthreads = lmp->kokkos->num_threads;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
RandPoolWrap::~RandPoolWrap()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void RandPoolWrap::destroy()
|
||||
{
|
||||
if (random_thr) {
|
||||
for (int i=1; i < nthreads; ++i)
|
||||
delete random_thr[i];
|
||||
|
||||
delete[] random_thr;
|
||||
random_thr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void RandPoolWrap::init(RanMars* random, int seed)
|
||||
{
|
||||
// deallocate pool of RNGs
|
||||
if (random_thr) {
|
||||
for (int i=1; i < this->nthreads; ++i)
|
||||
delete random_thr[i];
|
||||
|
||||
delete[] random_thr;
|
||||
}
|
||||
|
||||
// allocate pool of RNGs
|
||||
// generate a random number generator instance for
|
||||
// all threads != 0. make sure we use unique seeds.
|
||||
nthreads = lmp->kokkos->num_threads;
|
||||
random_thr = new RanMars*[nthreads];
|
||||
for (int tid = 1; tid < nthreads; ++tid) {
|
||||
random_thr[tid] = new RanMars(lmp, seed + comm->me
|
||||
+ comm->nprocs*tid);
|
||||
}
|
||||
|
||||
// to ensure full compatibility with the serial style
|
||||
// we use the serial random number generator instance for thread 0
|
||||
random_thr[0] = random;
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef RAND_POOL_WRAP_H
|
||||
#define RAND_POOL_WRAP_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "random_mars.h"
|
||||
#include "error.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct RandWrap {
|
||||
class RanMars* rng;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RandWrap() {
|
||||
rng = NULL;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand() {
|
||||
return rng->uniform();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
return rng->gaussian();
|
||||
}
|
||||
};
|
||||
|
||||
class RandPoolWrap : protected Pointers {
|
||||
public:
|
||||
RandPoolWrap(int, class LAMMPS *);
|
||||
~RandPoolWrap();
|
||||
void destroy();
|
||||
void init(RanMars*, int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RandWrap get_state() const
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
error->all(FLERR,"Cannot use Marsaglia RNG with GPUs");
|
||||
#endif
|
||||
|
||||
RandWrap rand_wrap;
|
||||
int tid = 0;
|
||||
#ifndef KOKKOS_HAVE_CUDA
|
||||
tid = LMPDeviceType::hardware_thread_id();
|
||||
#endif
|
||||
rand_wrap.rng = random_thr[tid];
|
||||
return rand_wrap;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void free_state(RandWrap) const
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
class RanMars **random_thr;
|
||||
int nthreads;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -67,7 +67,6 @@ void RegBlockKokkos<DeviceType>::match_all_kokkos(int groupbit_in, DAT::tdual_in
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagRegBlockMatchAll>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_match_in.template modify<DeviceType>();
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
# mpi = MPI with its default compiler
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = mpicxx
|
||||
CCFLAGS = -g -O3 -Wall -Wextra -frounding-math -fsignaling-nans -march=native
|
||||
SHFLAGS = -shared -MD -mcmodel=medium -fpic -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = mpicxx
|
||||
LINKFLAGS = -g -O
|
||||
LIB =
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings
|
||||
# see possible settings in Section 2.2 (step 4) of manual
|
||||
|
||||
LMP_INC = -DLAMMPS_GZIP
|
||||
#LMP_INC += -DLAMMPS_JPEG
|
||||
LMP_INC += -DLAMMPS_MEMALIGN=64
|
||||
|
||||
# MPI library
|
||||
# see discussion in Section 2.2 (step 5) of manual
|
||||
# MPI wrapper compiler/linker can provide this info
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
|
||||
MPI_PATH =
|
||||
MPI_LIB =
|
||||
|
||||
# FFT library
|
||||
# see discussion in Section 2.2 (step 6) of manual
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC =
|
||||
FFT_PATH =
|
||||
FFT_LIB =
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB =
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# do not edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
|
||||
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
depend : fastdep.exe $(SRC)
|
||||
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
|
||||
|
||||
fastdep.exe: ../DEPEND/fastdep.c
|
||||
cc -O -o $@ $<
|
||||
|
||||
sinclude .depend
|
|
@ -542,8 +542,8 @@ void DumpCustomMPIIO::write_string(int n, double *mybuf)
|
|||
|
||||
#if defined(_OPENMP)
|
||||
int nthreads = omp_get_max_threads();
|
||||
if (nthreads > 1)
|
||||
nsme = convert_string_omp(n,mybuf);
|
||||
if ((nthreads > 1) && !(lmp->kokkos))
|
||||
nsme = convert_string_omp(n,mybuf); // not (yet) compatible with Kokkos
|
||||
else
|
||||
nsme = convert_string(n,mybuf);
|
||||
#else
|
||||
|
|
|
@ -16,6 +16,9 @@ style_region.h
|
|||
style_neigh_bin.h
|
||||
style_neigh_pair.h
|
||||
style_neigh_stencil.h
|
||||
# deleted on 5 September 2017
|
||||
npair_halffull_newton_ssa.cpp
|
||||
npair_halffull_newton_ssa.h
|
||||
# deleted on 6 June 2017
|
||||
pair_lj_sf.cpp
|
||||
pair_lj_sf.h
|
||||
|
|
|
@ -34,6 +34,8 @@ FixDPDenergy::FixDPDenergy(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
pairDPDE = NULL;
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
|
||||
if (pairDPDE == NULL)
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
|
||||
|
||||
if (pairDPDE == NULL)
|
||||
error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix dpd/energy");
|
||||
|
|
|
@ -150,6 +150,8 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
FixEOStableRX::~FixEOStableRX()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
for (int m = 0; m < ntables; m++) {
|
||||
free_table(&tables[m]);
|
||||
free_table(&tables2[m]);
|
||||
|
|
|
@ -220,6 +220,9 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
FixRX::~FixRX()
|
||||
{
|
||||
//printf("Inside FixRX::~FixRX copymode= %d\n", copymode);
|
||||
if (copymode) return;
|
||||
|
||||
// De-Allocate memory to prevent memory leak
|
||||
for (int ii = 0; ii < nreactions; ii++){
|
||||
delete [] stoich[ii];
|
||||
|
@ -370,11 +373,11 @@ void FixRX::post_constructor()
|
|||
newarg2[nspecies+3] = (char *) "ghost";
|
||||
newarg2[nspecies+4] = (char *) "yes";
|
||||
|
||||
modify->add_fix(nspecies+5,newarg);
|
||||
modify->add_fix(nspecies+5,newarg,1);
|
||||
fix_species = (FixPropertyAtom *) modify->fix[modify->nfix-1];
|
||||
restartFlag = modify->fix[modify->nfix-1]->restart_reset;
|
||||
|
||||
modify->add_fix(nspecies+5,newarg2);
|
||||
modify->add_fix(nspecies+5,newarg2,1);
|
||||
fix_species_old = (FixPropertyAtom *) modify->fix[modify->nfix-1];
|
||||
|
||||
if(nspecies==0) error->all(FLERR,"There are no rx species specified.");
|
||||
|
@ -634,6 +637,9 @@ int FixRX::setmask()
|
|||
void FixRX::init()
|
||||
{
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
|
||||
if (pairDPDE == NULL)
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
|
||||
|
||||
if (pairDPDE == NULL)
|
||||
error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix rx");
|
||||
|
||||
|
@ -669,7 +675,17 @@ void FixRX::setup_pre_force(int vflag)
|
|||
|
||||
if(restartFlag){
|
||||
restartFlag = 0;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
int ode_counter[4] = {0};
|
||||
|
||||
UserRHSData userData;
|
||||
userData.kFor = new double[nreactions];
|
||||
userData.rxnRateLaw = new double[nreactions];
|
||||
|
||||
double *rwork = new double[8*nspecies];
|
||||
|
||||
if(localTempFlag){
|
||||
int count = nlocal + (newton_pair ? nghost : 0);
|
||||
dpdThetaLocal = new double[count];
|
||||
|
@ -682,22 +698,27 @@ void FixRX::setup_pre_force(int vflag)
|
|||
tmp = atom->dvector[ispecies][id];
|
||||
atom->dvector[ispecies+nspecies][id] = tmp;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit){
|
||||
|
||||
// Set the reaction rate constants to zero: no reactions occur at step 0
|
||||
for(int irxn=0;irxn<nreactions;irxn++)
|
||||
kR[irxn] = 0.0;
|
||||
userData.kFor[irxn] = 0.0;
|
||||
|
||||
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
|
||||
rk4(i,NULL);
|
||||
rk4(i, rwork, &userData);
|
||||
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
|
||||
rkf45(i,NULL);
|
||||
rkf45(i, rwork, &userData, ode_counter);
|
||||
}
|
||||
|
||||
// Communicate the updated momenta and velocities to all nodes
|
||||
comm->forward_comm_fix(this);
|
||||
if(localTempFlag) delete [] dpdThetaLocal;
|
||||
|
||||
delete [] userData.kFor;
|
||||
delete [] userData.rxnRateLaw;
|
||||
delete [] rwork;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -705,12 +726,13 @@ void FixRX::setup_pre_force(int vflag)
|
|||
|
||||
void FixRX::pre_force(int vflag)
|
||||
{
|
||||
TimerType timer_start = getTimeStamp();
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nghost = atom->nghost;
|
||||
int *mask = atom->mask;
|
||||
double *dpdTheta = atom->dpdTheta;
|
||||
int newton_pair = force->newton_pair;
|
||||
double theta;
|
||||
|
||||
if(localTempFlag){
|
||||
int count = nlocal + (newton_pair ? nghost : 0);
|
||||
|
@ -722,7 +744,10 @@ void FixRX::pre_force(int vflag)
|
|||
TimerType timer_localTemperature = getTimeStamp();
|
||||
|
||||
// Zero the counters for the ODE solvers.
|
||||
this->nSteps = this->nIters = this->nFuncs = this->nFails = 0;
|
||||
int nSteps = 0;
|
||||
int nIters = 0;
|
||||
int nFuncs = 0;
|
||||
int nFails = 0;
|
||||
|
||||
if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1)
|
||||
{
|
||||
|
@ -730,10 +755,23 @@ void FixRX::pre_force(int vflag)
|
|||
memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE");
|
||||
}
|
||||
|
||||
double *rwork = new double[8*nspecies + nreactions];
|
||||
//#pragma omp parallel \
|
||||
// reduction(+: nSteps, nIters, nFuncs, nFails )
|
||||
{
|
||||
double *rwork = new double[8*nspecies];
|
||||
|
||||
UserRHSData userData;
|
||||
userData.kFor = new double[nreactions];
|
||||
userData.rxnRateLaw = new double[nreactions];
|
||||
|
||||
int ode_counter[4] = { 0 };
|
||||
|
||||
//#pragma omp for schedule(runtime)
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit){
|
||||
{
|
||||
if (mask[i] & groupbit)
|
||||
{
|
||||
double theta;
|
||||
if (localTempFlag)
|
||||
theta = dpdThetaLocal[i];
|
||||
else
|
||||
|
@ -741,24 +779,42 @@ void FixRX::pre_force(int vflag)
|
|||
|
||||
//Compute the reaction rate constants
|
||||
for (int irxn = 0; irxn < nreactions; irxn++)
|
||||
kR[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta);
|
||||
userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta);
|
||||
|
||||
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
|
||||
rk4(i,rwork);
|
||||
rk4(i, rwork, &userData);
|
||||
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
|
||||
rkf45(i,rwork);
|
||||
rkf45(i, rwork, &userData, ode_counter);
|
||||
}
|
||||
}
|
||||
|
||||
TimerType timer_ODE = getTimeStamp();
|
||||
nSteps += ode_counter[0];
|
||||
nIters += ode_counter[1];
|
||||
nFuncs += ode_counter[2];
|
||||
nFails += ode_counter[3];
|
||||
|
||||
delete [] rwork;
|
||||
delete [] userData.kFor;
|
||||
delete [] userData.rxnRateLaw;
|
||||
|
||||
} // end parallel region
|
||||
|
||||
TimerType timer_ODE = getTimeStamp();
|
||||
|
||||
// Communicate the updated momenta and velocities to all nodes
|
||||
comm->forward_comm_fix(this);
|
||||
if(localTempFlag) delete [] dpdThetaLocal;
|
||||
|
||||
TimerType timer_stop = getTimeStamp();
|
||||
|
||||
double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE);
|
||||
|
||||
//printf("me= %d total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me,
|
||||
// getElapsedTime(timer_start, timer_stop),
|
||||
// getElapsedTime(timer_start, timer_localTemperature),
|
||||
// getElapsedTime(timer_localTemperature, timer_ODE),
|
||||
// getElapsedTime(timer_ODE, timer_stop), nlocal, nFuncs, nSteps);
|
||||
|
||||
// Warn the user if a failure was detected in the ODE solver.
|
||||
if (nFails > 0){
|
||||
char sbuf[128];
|
||||
|
@ -954,21 +1010,15 @@ void FixRX::setupParams()
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixRX::rk4(int id, double *rwork)
|
||||
void FixRX::rk4(int id, double *rwork, void* v_params)
|
||||
{
|
||||
double *k1 = NULL;
|
||||
if (rwork == NULL)
|
||||
k1 = new double[6*nspecies + nreactions];
|
||||
else
|
||||
k1 = rwork;
|
||||
double *k1 = rwork;
|
||||
double *k2 = k1 + nspecies;
|
||||
double *k3 = k2 + nspecies;
|
||||
double *k4 = k3 + nspecies;
|
||||
double *y = k4 + nspecies;
|
||||
double *yp = y + nspecies;
|
||||
|
||||
double *dummyArray = yp + nspecies; // Passed to the rhs function.
|
||||
|
||||
const int numSteps = minSteps;
|
||||
|
||||
const double h = update->dt / double(numSteps);
|
||||
|
@ -985,25 +1035,25 @@ void FixRX::rk4(int id, double *rwork)
|
|||
for (int step = 0; step < numSteps; step++)
|
||||
{
|
||||
// k1
|
||||
rhs(0.0,y,k1,dummyArray);
|
||||
rhs(0.0,y,k1,v_params);
|
||||
|
||||
// k2
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies];
|
||||
|
||||
rhs(0.0,yp,k2,dummyArray);
|
||||
rhs(0.0,yp,k2,v_params);
|
||||
|
||||
// k3
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies];
|
||||
|
||||
rhs(0.0,yp,k3,dummyArray);
|
||||
rhs(0.0,yp,k3,v_params);
|
||||
|
||||
// k4
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
yp[ispecies] = y[ispecies] + h*k3[ispecies];
|
||||
|
||||
rhs(0.0,yp,k4,dummyArray);
|
||||
rhs(0.0,yp,k4,v_params);
|
||||
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0);
|
||||
|
@ -1018,9 +1068,6 @@ void FixRX::rk4(int id, double *rwork)
|
|||
y[ispecies] = 0.0;
|
||||
atom->dvector[ispecies][id] = y[ispecies];
|
||||
}
|
||||
|
||||
if (rwork == NULL)
|
||||
delete [] k1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -1270,6 +1317,78 @@ void FixRX::odeDiagnostics(void)
|
|||
double max_per_proc[numCounters];
|
||||
double min_per_proc[numCounters];
|
||||
|
||||
if(1)
|
||||
{
|
||||
static bool firstStep = true;
|
||||
|
||||
static TimerType oldTimeStamp (-1);
|
||||
|
||||
TimerType now = getTimeStamp();
|
||||
|
||||
// Query the fix database and look for rx_weight for the balance fix.
|
||||
int type_flag = -1;
|
||||
int rx_weight_index = atom->find_custom( "rx_weight", /*0:int, 1:float*/ type_flag );
|
||||
|
||||
// Compute the average # of neighbors.
|
||||
double averageNumNeighbors = 0;
|
||||
{
|
||||
const int inum = pairDPDE->list->inum;
|
||||
const int* ilist = pairDPDE->list->ilist;
|
||||
const int* numneigh = pairDPDE->list->numneigh;
|
||||
|
||||
for (int ii = 0; ii < inum; ++ii)
|
||||
{
|
||||
const int i = ilist[ii];
|
||||
averageNumNeighbors += numneigh[i];
|
||||
}
|
||||
|
||||
averageNumNeighbors /= inum;
|
||||
}
|
||||
|
||||
printf("me= %d nst= %g nfc= %g time= %g nlocal= %g lmpnst= %g weight_idx= %d 1st= %d aveNeigh= %g\n", comm->me, this->diagnosticCounter[0], this->diagnosticCounter[1], this->diagnosticCounter[2], this->diagnosticCounter[3], this->diagnosticCounter[4], rx_weight_index, firstStep, averageNumNeighbors);
|
||||
|
||||
if (rx_weight_index != -1 && !firstStep && 0)
|
||||
{
|
||||
double *rx_weight = atom->dvector[rx_weight_index];
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
const int *mask = atom->mask;
|
||||
|
||||
if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1)
|
||||
{
|
||||
const double total_time = getElapsedTime( oldTimeStamp, now );
|
||||
const double fixrx_time = this->diagnosticCounter[TimeSum];
|
||||
const double time_ratio = fixrx_time / total_time;
|
||||
|
||||
double tsum = 0.0;
|
||||
double tmin = 100000, tmax = 0;
|
||||
for (int i = 0; i < nlocal; ++i)
|
||||
if (mask[i] & groupbit)
|
||||
{
|
||||
double nfunc_ratio = double( diagnosticCounterPerODE[FuncSum][i] ) / diagnosticCounter[FuncSum];
|
||||
rx_weight[i] = nfunc_ratio * fixrx_time + (total_time - fixrx_time) / nlocal;
|
||||
tmin = fmin( tmin, rx_weight[i] );
|
||||
tmax = fmax( tmax, rx_weight[i] );
|
||||
tsum += rx_weight[i];
|
||||
//rx_weight[i] = (double) diagnosticCounterPerODE[FuncSum][i];
|
||||
}
|
||||
|
||||
printf("me= %d total= %g fixrx= %g ratio= %g tsum= %g %g %g %g\n", comm->me, total_time, fixrx_time, time_ratio, tsum, (total_time - fixrx_time) / nlocal, tmin, tmax);
|
||||
}
|
||||
else
|
||||
{
|
||||
error->warning(FLERR, "Dynamic load balancing enabled but per-atom weights not available.");
|
||||
|
||||
for (int i = 0; i < nlocal; ++i)
|
||||
if (mask[i] & groupbit)
|
||||
rx_weight[i] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
firstStep = false;
|
||||
oldTimeStamp = now;
|
||||
}
|
||||
|
||||
// Compute counters per dpd time-step.
|
||||
for (int i = 0; i < numCounters; ++i){
|
||||
my_vals[i] = this->diagnosticCounter[i] / nTimes;
|
||||
|
@ -1343,7 +1462,7 @@ void FixRX::odeDiagnostics(void)
|
|||
if (screen) fprintf(screen,"%s\n", smesg); \
|
||||
if (logfile) fprintf(logfile,"%s\n", smesg); }
|
||||
|
||||
sprintf(smesg, "FixRX::ODE Diagnostics: # of steps |# of rhs evals| run-time (sec)");
|
||||
sprintf(smesg, "FixRX::ODE Diagnostics: # of iters |# of rhs evals| run-time (sec) | # atoms");
|
||||
print_mesg(smesg);
|
||||
|
||||
sprintf(smesg, " AVG per ODE : %-12.5g | %-12.5g | %-12.5g", avg_per_atom[0], avg_per_atom[1], avg_per_atom[2]);
|
||||
|
@ -1365,7 +1484,7 @@ void FixRX::odeDiagnostics(void)
|
|||
print_mesg(smesg);
|
||||
}
|
||||
|
||||
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g", avg_per_proc[0], avg_per_proc[1], avg_per_proc[2]);
|
||||
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]);
|
||||
print_mesg(smesg);
|
||||
|
||||
if (comm->nprocs > 1){
|
||||
|
@ -1373,13 +1492,13 @@ void FixRX::odeDiagnostics(void)
|
|||
for (int i = 0; i < numCounters; ++i)
|
||||
rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs );
|
||||
|
||||
sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2]);
|
||||
sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2], rms_per_proc[AtomSum]);
|
||||
print_mesg(smesg);
|
||||
|
||||
sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2]);
|
||||
sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2], max_per_proc[AtomSum]);
|
||||
print_mesg(smesg);
|
||||
|
||||
sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2]);
|
||||
sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2], min_per_proc[AtomSum]);
|
||||
print_mesg(smesg);
|
||||
}
|
||||
|
||||
|
@ -1399,7 +1518,7 @@ void FixRX::odeDiagnostics(void)
|
|||
return;
|
||||
}
|
||||
|
||||
void FixRX::rkf45(int id, double *rwork)
|
||||
void FixRX::rkf45(int id, double *rwork, void *v_param, int ode_counter[])
|
||||
{
|
||||
// Rounding coefficient.
|
||||
const double uround = DBL_EPSILON;
|
||||
|
@ -1408,12 +1527,7 @@ void FixRX::rkf45(int id, double *rwork)
|
|||
const double adaption_limit = 4.0;
|
||||
|
||||
//double *y = new double[8*nspecies + nreactions];
|
||||
double *y = NULL;
|
||||
if (rwork == NULL)
|
||||
y = new double[8*nspecies + nreactions];
|
||||
else
|
||||
y = rwork;
|
||||
double *rhstmp = y + 8*nspecies;
|
||||
double *y = rwork;
|
||||
|
||||
const int neq = nspecies;
|
||||
|
||||
|
@ -1450,7 +1564,7 @@ void FixRX::rkf45(int id, double *rwork)
|
|||
if (h < h_min){
|
||||
//fprintf(stderr,"hin not implemented yet\n");
|
||||
//exit(-1);
|
||||
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, rhstmp);
|
||||
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, v_param);
|
||||
}
|
||||
|
||||
//printf("t= %e t_stop= %e h= %e\n", t, t_stop, h);
|
||||
|
@ -1461,7 +1575,7 @@ void FixRX::rkf45(int id, double *rwork)
|
|||
double *eout = yout + neq;
|
||||
|
||||
// Take a trial step.
|
||||
rkf45_step (neq, h, y, yout, eout, rhstmp);
|
||||
rkf45_step (neq, h, y, yout, eout, v_param);
|
||||
|
||||
// Estimate the solution error.
|
||||
// ... weighted 2-norm of the error.
|
||||
|
@ -1509,16 +1623,17 @@ void FixRX::rkf45(int id, double *rwork)
|
|||
|
||||
if (maxIters && nit > maxIters){
|
||||
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
|
||||
nFails ++;
|
||||
//nFails ++;
|
||||
ode_counter[3] ++;
|
||||
break;
|
||||
// We should set an error here so that the solution is not used!
|
||||
}
|
||||
|
||||
} // end while
|
||||
|
||||
nSteps += nst;
|
||||
nIters += nit;
|
||||
nFuncs += nfe;
|
||||
ode_counter[0] += nst;
|
||||
ode_counter[1] += nit;
|
||||
ode_counter[2] += nfe;
|
||||
|
||||
//if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL)
|
||||
if (diagnosticCounterPerODE[StepSum] != NULL){
|
||||
|
@ -1535,9 +1650,6 @@ void FixRX::rkf45(int id, double *rwork)
|
|||
y[ispecies] = 0.0;
|
||||
atom->dvector[ispecies][id] = y[ispecies];
|
||||
}
|
||||
|
||||
if (rwork == NULL)
|
||||
delete [] y;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -1555,21 +1667,23 @@ int FixRX::rhs(double t, const double *y, double *dydt, void *params)
|
|||
|
||||
int FixRX::rhs_dense(double t, const double *y, double *dydt, void *params)
|
||||
{
|
||||
double rxnRateLawForward;
|
||||
double *rxnRateLaw = (double *) params;
|
||||
double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
double concentration;
|
||||
int nspecies = atom->nspecies_dpd;
|
||||
UserRHSData *userData = (UserRHSData *) params;
|
||||
|
||||
double *rxnRateLaw = userData->rxnRateLaw;
|
||||
double *kFor = userData->kFor;
|
||||
|
||||
const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
const int nspecies = atom->nspecies_dpd;
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
dydt[ispecies] = 0.0;
|
||||
|
||||
// Construct the reaction rate laws
|
||||
for(int jrxn=0; jrxn<nreactions; jrxn++){
|
||||
rxnRateLawForward = kR[jrxn];
|
||||
double rxnRateLawForward = kFor[jrxn];
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++){
|
||||
concentration = y[ispecies]/VDPD;
|
||||
const double concentration = y[ispecies]/VDPD;
|
||||
rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
|
||||
}
|
||||
rxnRateLaw[jrxn] = rxnRateLawForward;
|
||||
|
@ -1587,13 +1701,13 @@ int FixRX::rhs_dense(double t, const double *y, double *dydt, void *params)
|
|||
|
||||
int FixRX::rhs_sparse(double t, const double *y, double *dydt, void *v_params) const
|
||||
{
|
||||
double *_rxnRateLaw = (double *) v_params;
|
||||
UserRHSData *userData = (UserRHSData *) v_params;
|
||||
|
||||
const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
|
||||
#define kFor (this->kR)
|
||||
#define kFor (userData->kFor)
|
||||
#define kRev (NULL)
|
||||
#define rxnRateLaw (_rxnRateLaw)
|
||||
#define rxnRateLaw (userData->rxnRateLaw)
|
||||
#define conc (dydt)
|
||||
#define maxReactants (this->sparseKinetics_maxReactants)
|
||||
#define maxSpecies (this->sparseKinetics_maxSpecies)
|
||||
|
|
|
@ -66,14 +66,14 @@ class FixRX : public Fix {
|
|||
double *kR;
|
||||
|
||||
//!< Classic Runge-Kutta 4th-order stepper.
|
||||
void rk4(int,double*);
|
||||
void rk4(int, double*, void*);
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE Solver.
|
||||
void rkf45(int,double*);
|
||||
void rkf45(int, double*, void*, int ode_counter[]);
|
||||
|
||||
//!< Runge-Kutta-Fehlberg ODE stepper function.
|
||||
void rkf45_step (const int neq, const double h, double y[], double y_out[],
|
||||
double rwk[], void* v_param);
|
||||
double rwk[], void *);
|
||||
|
||||
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
|
||||
int rkf45_h0 (const int neq, const double t, const double t_stop,
|
||||
|
@ -90,6 +90,13 @@ class FixRX : public Fix {
|
|||
int rhs(double, const double *, double *, void *);
|
||||
int rhs_dense (double, const double *, double *, void *);
|
||||
|
||||
// User-defined data container needed in rhs.
|
||||
struct UserRHSData
|
||||
{
|
||||
double *kFor;
|
||||
double *rxnRateLaw;
|
||||
};
|
||||
|
||||
// Sparse stoichiometric matrix storage format and methods.
|
||||
bool useSparseKinetics;
|
||||
//SparseKinetics sparseKinetics;
|
||||
|
@ -116,10 +123,10 @@ class FixRX : public Fix {
|
|||
double relTol, absTol; //!< Relative and absolute tolerances for the ODE solver(s).
|
||||
|
||||
// ODE Diagnostics
|
||||
int nSteps; //!< # of accepted steps taken over all atoms.
|
||||
int nIters; //!< # of attemped steps for all atoms.
|
||||
int nFuncs; //!< # of RHS evaluations for all atoms.
|
||||
int nFails; //!< # of ODE systems that failed (for some reason).
|
||||
//int nSteps; //!< # of accepted steps taken over all atoms.
|
||||
//int nIters; //!< # of attemped steps for all atoms.
|
||||
//int nFuncs; //!< # of RHS evaluations for all atoms.
|
||||
//int nFails; //!< # of ODE systems that failed (for some reason).
|
||||
|
||||
int diagnosticFrequency; //!< Frequency (LMP steps) that run-time diagnostics will be printed to the log.
|
||||
enum { numDiagnosticCounters = 5 };
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include "pair_dpd_fdt.h"
|
||||
#include "pair_dpd_fdt_energy.h"
|
||||
#include "pair.h"
|
||||
#include "npair_half_bin_newton_ssa.h"
|
||||
#include "citeme.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
@ -95,6 +96,8 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) :
|
|||
pairDPDE = NULL;
|
||||
pairDPD = (PairDPDfdt *) force->pair_match("dpd/fdt",1);
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
|
||||
if (pairDPDE == NULL)
|
||||
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
|
||||
|
||||
if(pairDPDE){
|
||||
comm_forward = 3;
|
||||
|
@ -107,26 +110,12 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) :
|
|||
if(pairDPD == NULL && pairDPDE == NULL)
|
||||
error->all(FLERR,"Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow");
|
||||
|
||||
// Setup the ssaAIR array
|
||||
atom->ssaAIR = NULL;
|
||||
grow_arrays(atom->nmax);
|
||||
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
|
||||
|
||||
// Setup callbacks for maintaining atom->ssaAIR[]
|
||||
atom->add_callback(0); // grow (aka exchange)
|
||||
atom->add_callback(1); // restart
|
||||
atom->add_callback(2); // border
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixShardlow::~FixShardlow()
|
||||
{
|
||||
atom->delete_callback(id, 0);
|
||||
atom->delete_callback(id, 1);
|
||||
atom->delete_callback(id, 2);
|
||||
|
||||
memory->destroy(atom->ssaAIR);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -135,7 +124,6 @@ int FixShardlow::setmask()
|
|||
{
|
||||
int mask = 0;
|
||||
mask |= INITIAL_INTEGRATE;
|
||||
mask |= PRE_EXCHANGE | MIN_PRE_EXCHANGE;
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
@ -146,7 +134,9 @@ void FixShardlow::init()
|
|||
int irequest = neighbor->request(this,instance_me);
|
||||
neighbor->requests[irequest]->pair = 0;
|
||||
neighbor->requests[irequest]->fix = 1;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
neighbor->requests[irequest]->ssa = 1;
|
||||
neighbor->requests[irequest]->newton = 1; // SSA requires newton on
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -158,27 +148,6 @@ void FixShardlow::init_list(int id, NeighList *ptr)
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::pre_exchange()
|
||||
{
|
||||
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::setup_pre_exchange()
|
||||
{
|
||||
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::min_pre_exchange()
|
||||
{
|
||||
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::setup(int vflag)
|
||||
{
|
||||
bool fixShardlow = false;
|
||||
|
@ -243,6 +212,10 @@ void FixShardlow::ssa_update_dpd(
|
|||
const double mass_i = (rmass) ? rmass[i] : mass[itype];
|
||||
const double massinv_i = 1.0 / mass_i;
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
const int nlocal = atom->nlocal;
|
||||
#endif
|
||||
|
||||
// Loop over Directional Neighbors only
|
||||
for (int jj = 0; jj < jlen; jj++) {
|
||||
int j = jlist[jj] & NEIGHMASK;
|
||||
|
@ -252,9 +225,23 @@ void FixShardlow::ssa_update_dpd(
|
|||
double dely = ytmp - x[j][1];
|
||||
double delz = ztmp - x[j][2];
|
||||
double rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]);
|
||||
else ++(counters[0][1]);
|
||||
++(counters[0][2]);
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
++(hist[rsqi]);
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]);
|
||||
else ++(counters[1][1]);
|
||||
++(counters[1][2]);
|
||||
#endif
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
double delx_rinv = delx*rinv;
|
||||
|
@ -382,6 +369,10 @@ void FixShardlow::ssa_update_dpde(
|
|||
const double massinv_i = 1.0 / mass_i;
|
||||
const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v;
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
const int nlocal = atom->nlocal;
|
||||
#endif
|
||||
|
||||
// Loop over Directional Neighbors only
|
||||
for (int jj = 0; jj < jlen; jj++) {
|
||||
int j = jlist[jj] & NEIGHMASK;
|
||||
|
@ -391,9 +382,23 @@ void FixShardlow::ssa_update_dpde(
|
|||
double dely = ytmp - x[j][1];
|
||||
double delz = ztmp - x[j][2];
|
||||
double rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]);
|
||||
else ++(counters[0][1]);
|
||||
++(counters[0][2]);
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
++(hist[rsqi]);
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]);
|
||||
else ++(counters[1][1]);
|
||||
++(counters[1][2]);
|
||||
#endif
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
double delx_rinv = delx*rinv;
|
||||
|
@ -518,7 +523,19 @@ void FixShardlow::initial_integrate(int vflag)
|
|||
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
|
||||
|
||||
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
|
||||
error->all(FLERR,"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin\n");
|
||||
{
|
||||
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
|
||||
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
|
||||
sprintf(msg, fmt, rcut, bbx, bby, bbz);
|
||||
error->one(FLERR, msg);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
for (int i = 0; i < 2; ++i)
|
||||
for (int j = 0; j < 3; ++j)
|
||||
counters[i][j] = 0;
|
||||
for (int i = 0; i < 32; ++i) hist[i] = 0;
|
||||
#endif
|
||||
|
||||
// Allocate memory for v_t0 to hold the initial velocities for the ghosts
|
||||
v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0");
|
||||
|
@ -528,10 +545,37 @@ void FixShardlow::initial_integrate(int vflag)
|
|||
|
||||
dtsqrt = sqrt(update->dt);
|
||||
|
||||
//Loop over all 14 directions (8 stages)
|
||||
for (airnum = 1; airnum <=8; airnum++){
|
||||
NPairHalfBinNewtonSSA *np_ssa = dynamic_cast<NPairHalfBinNewtonSSA*>(list->np);
|
||||
if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairHalfBinNewtonSSA object");
|
||||
int ssa_phaseCt = np_ssa->ssa_phaseCt;
|
||||
int *ssa_phaseLen = np_ssa->ssa_phaseLen;
|
||||
int **ssa_itemLoc = np_ssa->ssa_itemLoc;
|
||||
int **ssa_itemLen = np_ssa->ssa_itemLen;
|
||||
|
||||
// process neighbors in the local AIR
|
||||
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int workItemCt = ssa_phaseLen[workPhase];
|
||||
|
||||
for (int workItem = 0; workItem < workItemCt; ++workItem) {
|
||||
int ct = ssa_itemLen[workPhase][workItem];
|
||||
ii = ssa_itemLoc[workPhase][workItem];
|
||||
|
||||
while (ct-- > 0) {
|
||||
int len = list->numneigh[ii];
|
||||
if (len > 0) {
|
||||
if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len);
|
||||
else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len);
|
||||
}
|
||||
ii++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ii = inum;
|
||||
//Loop over all 13 outward directions (7 stages)
|
||||
for (airnum = 1; airnum <=7; airnum++){
|
||||
int ct = list->AIRct_ssa[airnum];
|
||||
|
||||
if (airnum > 1) {
|
||||
// Communicate the updated velocities to all nodes
|
||||
comm->forward_comm_fix(this);
|
||||
|
||||
|
@ -540,24 +584,30 @@ void FixShardlow::initial_integrate(int vflag)
|
|||
memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
|
||||
memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over neighbors of my atoms
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
int start = (airnum < 2) ? 0 : list->ndxAIR_ssa[i][airnum - 2];
|
||||
int len = list->ndxAIR_ssa[i][airnum - 1] - start;
|
||||
if (len > 0) {
|
||||
if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len);
|
||||
else ssa_update_dpd(i, &(list->firstneigh[i][start]), len);
|
||||
}
|
||||
// process neighbors in this AIR
|
||||
while (ct-- > 0) {
|
||||
int len = list->numneigh[ii];
|
||||
if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len);
|
||||
else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len);
|
||||
ii++;
|
||||
}
|
||||
|
||||
// Communicate the ghost deltas to the atom owners
|
||||
if (airnum > 1) comm->reverse_comm_fix(this);
|
||||
comm->reverse_comm_fix(this);
|
||||
|
||||
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", hist[i]);
|
||||
fprintf(stdout, "\n%6d %6d,%6d %6d: "
|
||||
,counters[0][2]
|
||||
,counters[1][2]
|
||||
,counters[0][1]
|
||||
,counters[1][1]
|
||||
);
|
||||
#endif
|
||||
|
||||
memory->sfree(v_t0);
|
||||
v_t0 = NULL;
|
||||
}
|
||||
|
@ -643,91 +693,11 @@ void FixShardlow::unpack_reverse_comm(int n, int *list, double *buf)
|
|||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
convert atom coords into the ssa active interaction region number
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixShardlow::coord2ssaAIR(double *x)
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
ix = iy = iz = 0;
|
||||
if (x[2] < domain->sublo[2]) iz = -1;
|
||||
if (x[2] >= domain->subhi[2]) iz = 1;
|
||||
if (x[1] < domain->sublo[1]) iy = -1;
|
||||
if (x[1] >= domain->subhi[1]) iy = 1;
|
||||
if (x[0] < domain->sublo[0]) ix = -1;
|
||||
if (x[0] >= domain->subhi[0]) ix = 1;
|
||||
|
||||
if(iz < 0){
|
||||
return -1;
|
||||
} else if(iz == 0){
|
||||
if( iy<0 ) return -1; // bottom left/middle/right
|
||||
if( (iy==0) && (ix<0) ) return -1; // left atoms
|
||||
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
|
||||
if( (iy==0) && (ix>0) ) return 3; // Right atoms
|
||||
if( (iy>0) && (ix==0) ) return 2; // Top-middle atoms
|
||||
if( (iy>0) && (ix!=0) ) return 4; // Top-right and top-left atoms
|
||||
} else { // iz > 0
|
||||
if((ix==0) && (iy==0)) return 5; // Back atoms
|
||||
if((ix==0) && (iy!=0)) return 6; // Top-back and bottom-back atoms
|
||||
if((ix!=0) && (iy==0)) return 7; // Left-back and right-back atoms
|
||||
if((ix!=0) && (iy!=0)) return 8; // Back corner atoms
|
||||
}
|
||||
|
||||
return -2;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::grow_arrays(int nmax)
|
||||
{
|
||||
memory->grow(atom->ssaAIR,nmax,"fix_shardlow:ssaAIR");
|
||||
}
|
||||
|
||||
void FixShardlow::copy_arrays(int i, int j, int delflag)
|
||||
{
|
||||
atom->ssaAIR[j] = atom->ssaAIR[i];
|
||||
}
|
||||
|
||||
void FixShardlow::set_arrays(int i)
|
||||
{
|
||||
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
|
||||
}
|
||||
|
||||
int FixShardlow::pack_border(int n, int *list, double *buf)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
int j = list[i];
|
||||
if (atom->ssaAIR[j] == 0) atom->ssaAIR[j] = 1; // not purely local anymore
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FixShardlow::unpack_border(int n, int first, double *buf)
|
||||
{
|
||||
int i,last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
atom->ssaAIR[i] = coord2ssaAIR(atom->x[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FixShardlow::unpack_exchange(int i, double *buf)
|
||||
{
|
||||
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void FixShardlow::unpack_restart(int i, int nth)
|
||||
{
|
||||
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
|
||||
}
|
||||
|
||||
double FixShardlow::memory_usage()
|
||||
{
|
||||
double bytes = 0.0;
|
||||
bytes += memory->usage(atom->ssaAIR,atom->nmax);
|
||||
bytes += sizeof(double)*3*atom->nghost; // v_t0[]
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -35,21 +35,14 @@ class FixShardlow : public Fix {
|
|||
virtual void init_list(int, class NeighList *);
|
||||
virtual void setup(int);
|
||||
virtual void initial_integrate(int);
|
||||
void setup_pre_exchange();
|
||||
void pre_exchange();
|
||||
void min_pre_exchange();
|
||||
|
||||
void grow_arrays(int);
|
||||
void copy_arrays(int, int, int);
|
||||
void set_arrays(int);
|
||||
|
||||
int pack_border(int, int *, double *);
|
||||
int unpack_border(int, int, double *);
|
||||
int unpack_exchange(int, double *);
|
||||
void unpack_restart(int, int);
|
||||
|
||||
double memory_usage();
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
int counters[2][3];
|
||||
int hist[32];
|
||||
#endif
|
||||
|
||||
protected:
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
|
@ -63,7 +56,6 @@ class FixShardlow : public Fix {
|
|||
private:
|
||||
double dtsqrt; // = sqrt(update->dt);
|
||||
|
||||
int coord2ssaAIR(double *); // map atom coord to an AIR number
|
||||
void ssa_update_dpd(int, int *, int); // Constant Temperature
|
||||
void ssa_update_dpde(int, int *, int); // Constant Energy
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "group.h"
|
||||
#include "domain.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
|
@ -29,24 +30,19 @@ using namespace LAMMPS_NS;
|
|||
|
||||
NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp)
|
||||
{
|
||||
maxbin_ssa = 0;
|
||||
bins_ssa = NULL;
|
||||
maxhead_ssa = 0;
|
||||
binhead_ssa = NULL;
|
||||
gbinhead_ssa = NULL;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
gairhead_ssa[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
NBinSSA::~NBinSSA()
|
||||
{
|
||||
memory->destroy(bins_ssa);
|
||||
memory->destroy(binhead_ssa);
|
||||
memory->destroy(gbinhead_ssa);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
|
||||
local atoms are in distinct bins (binhead_ssa) from the ghosts
|
||||
ghost atoms are in distinct bins (gbinhead_ssa) from the locals
|
||||
local atoms are in distinct bins (binhead[]) from the ghosts
|
||||
ghost atoms are "binned" in gairhead_ssa[] instead
|
||||
ghosts which are not in an Active Interaction Region (AIR) are skipped
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
@ -58,13 +54,19 @@ void NBinSSA::bin_atoms()
|
|||
if (includegroup) nlocal = atom->nfirst;
|
||||
double **x = atom->x;
|
||||
int *mask = atom->mask;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
int xbin,ybin,zbin;
|
||||
|
||||
last_bin = update->ntimestep;
|
||||
|
||||
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
|
||||
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
gairhead_ssa[i] = -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < mbins; i++) {
|
||||
gbinhead_ssa[i] = -1;
|
||||
binhead_ssa[i] = -1;
|
||||
binhead[i] = -1;
|
||||
}
|
||||
|
||||
// bin in reverse order so linked list will be in forward order
|
||||
|
@ -73,29 +75,34 @@ void NBinSSA::bin_atoms()
|
|||
int bitmask = group->bitmask[includegroup];
|
||||
int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above
|
||||
for (i = nall-1; i >= nowned; i--) {
|
||||
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
|
||||
ibin = coord2ssaAIR(x[i]);
|
||||
if (ibin < 1) continue; // skip ghost atoms not in AIR
|
||||
if (mask[i] & bitmask) {
|
||||
ibin = coord2bin(x[i]);
|
||||
atom2bin[i] = ibin;
|
||||
bins_ssa[i] = gbinhead_ssa[ibin];
|
||||
gbinhead_ssa[ibin] = i;
|
||||
bins[i] = gairhead_ssa[ibin];
|
||||
gairhead_ssa[ibin] = i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = nall-1; i >= nlocal; i--) {
|
||||
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
|
||||
ibin = coord2bin(x[i]);
|
||||
atom2bin[i] = ibin;
|
||||
bins_ssa[i] = gbinhead_ssa[ibin];
|
||||
gbinhead_ssa[ibin] = i;
|
||||
ibin = coord2ssaAIR(x[i]);
|
||||
if (ibin < 1) continue; // skip ghost atoms not in AIR
|
||||
bins[i] = gairhead_ssa[ibin];
|
||||
gairhead_ssa[ibin] = i;
|
||||
}
|
||||
}
|
||||
for (i = nlocal-1; i >= 0; i--) {
|
||||
ibin = coord2bin(x[i]);
|
||||
atom2bin[i] = ibin;
|
||||
bins_ssa[i] = binhead_ssa[ibin];
|
||||
binhead_ssa[ibin] = i;
|
||||
ibin = coord2bin(x[i][0], x[i][1], x[i][2], xbin, ybin, zbin);
|
||||
// Find the bounding box of the local atoms in the bins
|
||||
if (xbin < lbinxlo) lbinxlo = xbin;
|
||||
if (xbin >= lbinxhi) lbinxhi = xbin + 1;
|
||||
if (ybin < lbinylo) lbinylo = ybin;
|
||||
if (ybin >= lbinyhi) lbinyhi = ybin + 1;
|
||||
if (zbin < lbinzlo) lbinzlo = zbin;
|
||||
if (zbin >= lbinzhi) lbinzhi = zbin + 1;
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -104,19 +111,13 @@ void NBinSSA::bin_atoms_setup(int nall)
|
|||
{
|
||||
NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too
|
||||
|
||||
if (mbins > maxhead_ssa) {
|
||||
maxhead_ssa = mbins;
|
||||
memory->destroy(gbinhead_ssa);
|
||||
memory->destroy(binhead_ssa);
|
||||
memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa");
|
||||
memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa");
|
||||
}
|
||||
|
||||
if (nall > maxbin_ssa) {
|
||||
maxbin_ssa = nall;
|
||||
memory->destroy(bins_ssa);
|
||||
memory->create(bins_ssa,maxbin_ssa,"bins_ssa");
|
||||
}
|
||||
// Clear the local bin extent bounding box.
|
||||
lbinxlo = mbinx - 1; // Safe to = stencil->sx + 1
|
||||
lbinylo = mbiny - 1; // Safe to = stencil->sy + 1
|
||||
lbinzlo = mbinz - 1; // Safe to = stencil->sz + 1
|
||||
lbinxhi = 0; // Safe to = mbinx - stencil->sx - 1
|
||||
lbinyhi = 0; // Safe to = mbiny - stencil->sy - 1
|
||||
lbinzhi = 0; // Safe to = mbinz - stencil->sz - 1
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -125,10 +126,39 @@ bigint NBinSSA::memory_usage()
|
|||
{
|
||||
bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too
|
||||
|
||||
if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa);
|
||||
if (maxhead_ssa) {
|
||||
bytes += memory->usage(binhead_ssa,maxhead_ssa);
|
||||
bytes += memory->usage(gbinhead_ssa,maxhead_ssa);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
convert atom coords into the ssa active interaction region number
|
||||
------------------------------------------------------------------------- */
|
||||
int NBinSSA::coord2ssaAIR(const double *x)
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
ix = iy = iz = 0;
|
||||
if (x[2] < domain->sublo[2]) iz = -1;
|
||||
if (x[2] >= domain->subhi[2]) iz = 1;
|
||||
if (x[1] < domain->sublo[1]) iy = -1;
|
||||
if (x[1] >= domain->subhi[1]) iy = 1;
|
||||
if (x[0] < domain->sublo[0]) ix = -1;
|
||||
if (x[0] >= domain->subhi[0]) ix = 1;
|
||||
|
||||
if(iz < 0){
|
||||
return -1;
|
||||
} else if(iz == 0){
|
||||
if( iy<0 ) return -1; // bottom left/middle/right
|
||||
if( (iy==0) && (ix<0) ) return -1; // left atoms
|
||||
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
|
||||
if( (iy==0) && (ix>0) ) return 2; // Right atoms
|
||||
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
|
||||
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
|
||||
} else { // iz > 0
|
||||
if((ix==0) && (iy==0)) return 4; // Back atoms
|
||||
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
|
||||
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
|
||||
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
|
||||
}
|
||||
|
||||
return -2;
|
||||
}
|
||||
|
|
|
@ -29,11 +29,15 @@ namespace LAMMPS_NS {
|
|||
class NBinSSA : public NBinStandard {
|
||||
public:
|
||||
|
||||
int *bins_ssa; // index of next atom in each bin
|
||||
int maxbin_ssa; // size of bins_ssa array
|
||||
int *binhead_ssa; // index of 1st local atom in each bin
|
||||
int *gbinhead_ssa; // index of 1st ghost atom in each bin
|
||||
int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays
|
||||
int gairhead_ssa[8]; // index of 1st ghost atom in each AIR
|
||||
|
||||
// Bounds of the local atoms in the binhead array
|
||||
int lbinxlo; // lowest local bin x-dim coordinate
|
||||
int lbinylo; // lowest local bin y-dim coordinate
|
||||
int lbinzlo; // lowest local bin z-dim coordinate
|
||||
int lbinxhi; // highest local bin x-dim coordinate
|
||||
int lbinyhi; // highest local bin y-dim coordinate
|
||||
int lbinzhi; // highest local bin z-dim coordinate
|
||||
|
||||
NBinSSA(class LAMMPS *);
|
||||
~NBinSSA();
|
||||
|
@ -42,6 +46,115 @@ class NBinSSA : public NBinStandard {
|
|||
void bin_atoms();
|
||||
|
||||
bigint memory_usage();
|
||||
|
||||
inline
|
||||
int coord2bin(const double & x,const double & y,const double & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
inline
|
||||
int coord2bin(const double & x,const double & y,const double & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
inline
|
||||
int coord2bin(const double & x,const double & y,const double & z, int &ixo, int &iyo, int &izo) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
ixo = ix - mbinxlo;
|
||||
iyo = iy - mbinylo;
|
||||
izo = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
private:
|
||||
int coord2ssaAIR(const double *); // map atom coord to an AIR number
|
||||
double bboxlo_[3],bboxhi_[3];
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -32,15 +32,29 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// allocate space for static class variable
|
||||
// prototype for non-class function
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
static int *ssaAIRptr;
|
||||
static int cmp_ssaAIR(const void *, const void *);
|
||||
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp)
|
||||
{
|
||||
ssa_maxPhaseCt = 0;
|
||||
ssa_maxPhaseLen = 0;
|
||||
ssa_phaseCt = 0;
|
||||
ssa_phaseLen = NULL;
|
||||
ssa_itemLoc = NULL;
|
||||
ssa_itemLen = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
|
||||
NPairHalfBinNewtonSSA::~NPairHalfBinNewtonSSA()
|
||||
{
|
||||
ssa_maxPhaseCt = 0;
|
||||
ssa_maxPhaseLen = 0;
|
||||
ssa_phaseCt = 0;
|
||||
memory->destroy(ssa_phaseLen);
|
||||
memory->destroy(ssa_itemLoc);
|
||||
memory->destroy(ssa_itemLen);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
|
@ -65,7 +79,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
|||
int **nspecial = atom->nspecial;
|
||||
int nlocal = atom->nlocal;
|
||||
if (includegroup) nlocal = atom->nfirst;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
|
@ -81,26 +94,83 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
|||
|
||||
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
|
||||
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
|
||||
int nstencil_half = ns_ssa->nstencil_half;
|
||||
int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]);
|
||||
int nstencil_full = ns_ssa->nstencil;
|
||||
|
||||
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
|
||||
if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object");
|
||||
int *bins_ssa = nb_ssa->bins_ssa;
|
||||
int *binhead_ssa = nb_ssa->binhead_ssa;
|
||||
int *gbinhead_ssa = nb_ssa->gbinhead_ssa;
|
||||
int *bins = nb_ssa->bins;
|
||||
int *binhead = nb_ssa->binhead;
|
||||
int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]);
|
||||
|
||||
int inum = 0;
|
||||
int gnum = 0;
|
||||
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
|
||||
int **stencilxyz = ns_ssa->stencilxyz;
|
||||
int lbinxlo = nb_ssa->lbinxlo;
|
||||
int lbinxhi = nb_ssa->lbinxhi;
|
||||
int lbinylo = nb_ssa->lbinylo;
|
||||
int lbinyhi = nb_ssa->lbinyhi;
|
||||
int lbinzlo = nb_ssa->lbinzlo;
|
||||
int lbinzhi = nb_ssa->lbinzhi;
|
||||
|
||||
int sx1 = ns_ssa->sx + 1;
|
||||
int sy1 = ns_ssa->sy + 1;
|
||||
int sz1 = ns_ssa->sz + 1;
|
||||
|
||||
ssa_phaseCt = sz1*sy1*sx1;
|
||||
|
||||
xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1;
|
||||
ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1;
|
||||
zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1;
|
||||
|
||||
int phaseLenEstimate = xbin*ybin*zbin;
|
||||
|
||||
if (ssa_phaseCt > ssa_maxPhaseCt) {
|
||||
ssa_maxPhaseCt = ssa_phaseCt;
|
||||
ssa_maxPhaseLen = 0;
|
||||
memory->destroy(ssa_phaseLen);
|
||||
memory->destroy(ssa_itemLoc);
|
||||
memory->destroy(ssa_itemLen);
|
||||
memory->create(ssa_phaseLen,ssa_maxPhaseCt,"NPairHalfBinNewtonSSA:ssa_phaseLen");
|
||||
}
|
||||
|
||||
if (phaseLenEstimate > ssa_maxPhaseLen) {
|
||||
ssa_maxPhaseLen = phaseLenEstimate;
|
||||
memory->destroy(ssa_itemLoc);
|
||||
memory->destroy(ssa_itemLen);
|
||||
memory->create(ssa_itemLoc,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLoc");
|
||||
memory->create(ssa_itemLen,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLen");
|
||||
}
|
||||
|
||||
ipage->reset();
|
||||
|
||||
// loop over owned atoms, storing half of the neighbors
|
||||
int workPhase = 0;
|
||||
// loop over bins with local atoms, storing half of the neighbors
|
||||
for (int zoff = ns_ssa->sz; zoff >= 0; --zoff) {
|
||||
for (int yoff = ns_ssa->sy; yoff >= 0; --yoff) {
|
||||
for (int xoff = ns_ssa->sx; xoff >= 0; --xoff) {
|
||||
int workItem = 0;
|
||||
for (zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
|
||||
for (ybin = lbinylo + yoff - ns_ssa->sy; ybin < lbinyhi; ybin += sy1) {
|
||||
for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sx1) {
|
||||
if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small");
|
||||
ssa_itemLoc[workPhase][workItem] = inum; // record where workItem starts in ilist
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
int AIRct[8] = { 0 };
|
||||
for (int subphase = 0; subphase < 4; subphase++) {
|
||||
int s_ybin = ybin + ((subphase & 0x2) ? ns_ssa->sy : 0);
|
||||
int s_xbin = xbin + ((subphase & 0x1) ? ns_ssa->sx : 0);
|
||||
int ibin, ct;
|
||||
|
||||
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
|
||||
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
|
||||
ibin = zbin*nb_ssa->mbiny*nb_ssa->mbinx
|
||||
+ s_ybin*nb_ssa->mbinx
|
||||
+ s_xbin;
|
||||
|
||||
for (i = binhead[ibin]; i >= 0; i = bins[i]) {
|
||||
n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
|
@ -111,52 +181,18 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
|||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over rest of local atoms in i's bin
|
||||
// just store them, since j is beyond i in linked list
|
||||
|
||||
for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) {
|
||||
|
||||
// loop over all local atoms in the current stencil "subphase"
|
||||
for (k = nstencil_ssa[subphase]; k < nstencil_ssa[subphase+1]; k++) {
|
||||
const int jbin = ibin+stencil[k];
|
||||
if (jbin != ibin) j = binhead[jbin];
|
||||
else j = bins[i]; // same bin as i, so start just past i in the bin
|
||||
for (; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ibin = atom2bin[i];
|
||||
|
||||
// loop over all local atoms in other bins in "half" stencil
|
||||
|
||||
for (k = 0; k < nstencil_half; k++) {
|
||||
for (j = binhead_ssa[ibin+stencil[k]]; j >= 0;
|
||||
j = bins_ssa[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
|
@ -174,86 +210,102 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
|
|||
}
|
||||
}
|
||||
}
|
||||
AIRct[0] = n;
|
||||
|
||||
// loop over AIR ghost atoms in all bins in "full" stencil
|
||||
// Note: the non-AIR ghost atoms have already been filtered out
|
||||
// That is a significant time savings because of the "full" stencil
|
||||
// Note2: only non-pure locals can have ghosts as neighbors
|
||||
|
||||
if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) {
|
||||
for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0;
|
||||
j = bins_ssa[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
} else if (domain->minimum_image_check(delx,dely,delz)) {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
} else if (which > 0) {
|
||||
neighptr[n++] = j ^ (which << SBBITS);
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
} else {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
firstneigh[inum] = neighptr;
|
||||
numneigh[inum] = n;
|
||||
ilist[inum++] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
}
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
// sort the ghosts in the neighbor list by their ssaAIR number
|
||||
|
||||
ssaAIRptr = atom->ssaAIR;
|
||||
qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR);
|
||||
|
||||
// do a prefix sum on the counts to turn them into indexes
|
||||
|
||||
list->ndxAIR_ssa[i][0] = AIRct[0];
|
||||
for (int ndx = 1; ndx < 8; ++ndx) {
|
||||
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
|
||||
}
|
||||
}
|
||||
// record where workItem ends in ilist
|
||||
ssa_itemLen[workPhase][workItem] = inum - ssa_itemLoc[workPhase][workItem];
|
||||
if (ssa_itemLen[workPhase][workItem] > 0) workItem++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = inum;
|
||||
// record where workPhase ends
|
||||
ssa_phaseLen[workPhase++] = workItem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
comparison function invoked by qsort()
|
||||
accesses static class member ssaAIRptr, set before call to qsort()
|
||||
------------------------------------------------------------------------- */
|
||||
if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong");
|
||||
|
||||
static int cmp_ssaAIR(const void *iptr, const void *jptr)
|
||||
{
|
||||
int i = NEIGHMASK & *((int *) iptr);
|
||||
int j = NEIGHMASK & *((int *) jptr);
|
||||
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
|
||||
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
|
||||
return 0;
|
||||
list->AIRct_ssa[0] = list->inum = inum;
|
||||
|
||||
// loop over AIR ghost atoms, storing their local neighbors
|
||||
// since these are ghosts, must check if stencil bin is out of bounds
|
||||
for (int airnum = 1; airnum <= 7; airnum++) {
|
||||
int locAIRct = 0;
|
||||
for (i = gairhead_ssa[airnum]; i >= 0; i = bins[i]) {
|
||||
n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
|
||||
ibin = coord2bin(x[i],xbin,ybin,zbin);
|
||||
|
||||
// loop over AIR ghost atoms in all bins in "full" stencil
|
||||
// Note: the non-AIR ghost atoms have already been filtered out
|
||||
for (k = 0; k < nstencil_full; k++) {
|
||||
xbin2 = xbin + stencilxyz[k][0];
|
||||
ybin2 = ybin + stencilxyz[k][1];
|
||||
zbin2 = zbin + stencilxyz[k][2];
|
||||
// Skip it if this bin is outside the extent of local bins
|
||||
if (xbin2 < lbinxlo || xbin2 >= lbinxhi ||
|
||||
ybin2 < lbinylo || ybin2 >= lbinyhi ||
|
||||
zbin2 < lbinzlo || zbin2 >= lbinzhi) continue;
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[j],nspecial[j],tag[i]);
|
||||
else {
|
||||
int jmol = molindex[j];
|
||||
if (jmol >= 0) {
|
||||
int jatom = molatom[j];
|
||||
which = find_special(onemols[jmol]->special[jatom],
|
||||
onemols[jmol]->nspecial[jatom],
|
||||
tag[i] - (tag[j] - jatom - 1));
|
||||
} else which = 0;
|
||||
}
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
firstneigh[inum + gnum] = neighptr;
|
||||
numneigh[inum + gnum] = n;
|
||||
ilist[inum + (gnum++)] = i;
|
||||
++locAIRct;
|
||||
}
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one");
|
||||
}
|
||||
list->AIRct_ssa[airnum] = locAIRct;
|
||||
}
|
||||
list->gnum = gnum;
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
NPairStyle(half/bin/newton/ssa,
|
||||
NPairHalfBinNewtonSSA,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA)
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST)
|
||||
|
||||
#else
|
||||
|
||||
|
@ -28,9 +28,18 @@ namespace LAMMPS_NS {
|
|||
|
||||
class NPairHalfBinNewtonSSA : public NPair {
|
||||
public:
|
||||
// SSA Work plan data structures
|
||||
int ssa_phaseCt;
|
||||
int *ssa_phaseLen;
|
||||
int **ssa_itemLoc;
|
||||
int **ssa_itemLen;
|
||||
|
||||
NPairHalfBinNewtonSSA(class LAMMPS *);
|
||||
~NPairHalfBinNewtonSSA() {}
|
||||
~NPairHalfBinNewtonSSA();
|
||||
void build(class NeighList *);
|
||||
private:
|
||||
int ssa_maxPhaseCt;
|
||||
int ssa_maxPhaseLen;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -1,132 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_ssa.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// allocate space for static class variable
|
||||
// prototype for non-class function
|
||||
|
||||
static int *ssaAIRptr;
|
||||
static int cmp_ssaAIR(const void *, const void *);
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list for use by Shardlow Spliting Algorithm
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonSSA::build(NeighList *list)
|
||||
{
|
||||
int i,j,ii,jj,n,jnum,joriginal;
|
||||
int *neighptr,*jlist;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
MyPage<int> *ipage = list->ipage;
|
||||
|
||||
int *ilist_full = list->listfull->ilist;
|
||||
int *numneigh_full = list->listfull->numneigh;
|
||||
int **firstneigh_full = list->listfull->firstneigh;
|
||||
int inum_full = list->listfull->inum;
|
||||
|
||||
int inum = 0;
|
||||
ipage->reset();
|
||||
|
||||
// loop over parent full list
|
||||
|
||||
for (ii = 0; ii < inum_full; ii++) {
|
||||
int AIRct[8] = { 0 };
|
||||
n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
i = ilist_full[ii];
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
jlist = firstneigh_full[i];
|
||||
jnum = numneigh_full[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
joriginal = jlist[jj];
|
||||
j = joriginal & NEIGHMASK;
|
||||
if (j < nlocal) {
|
||||
if (i > j) continue;
|
||||
++(AIRct[0]);
|
||||
} else {
|
||||
if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[inum++] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
// sort the locals+ghosts in the neighbor list by their ssaAIR number
|
||||
|
||||
ssaAIRptr = atom->ssaAIR;
|
||||
qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR);
|
||||
|
||||
// do a prefix sum on the counts to turn them into indexes
|
||||
|
||||
list->ndxAIR_ssa[i][0] = AIRct[0];
|
||||
for (int ndx = 1; ndx < 8; ++ndx) {
|
||||
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = inum;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
comparison function invoked by qsort()
|
||||
accesses static class member ssaAIRptr, set before call to qsort()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static int cmp_ssaAIR(const void *iptr, const void *jptr)
|
||||
{
|
||||
int i = NEIGHMASK & *((int *) iptr);
|
||||
int j = NEIGHMASK & *((int *) jptr);
|
||||
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
|
||||
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -42,23 +42,72 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
|
|||
void NStencilHalfBin2dNewtonSSA::create()
|
||||
{
|
||||
int i,j,pos = 0;
|
||||
nstencil_ssa[0] = 0; // redundant info, but saves a conditional
|
||||
|
||||
// Include the centroid at the start.
|
||||
// It will be handled as part of Subphase 0.
|
||||
stencilxyz[pos][0] = 0;
|
||||
stencilxyz[pos][1] = 0;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = 0;
|
||||
|
||||
// Subphase 0: upper right front bins (red)
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (j > 0 || (j == 0 && i > 0))
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq)
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (j > 0 || i > 0) // skip the centroid
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[1] = pos;
|
||||
// Subphase 1: upper left front bins (light blue)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[2] = pos;
|
||||
// Subphase 2: lower right front bins (yellow)
|
||||
|
||||
nstencil_ssa[3] = pos;
|
||||
// Subphase 3: lower left front bins (blue)
|
||||
|
||||
nstencil_ssa[4] = pos; // record end of half stencil
|
||||
// Now include additional bins for AIR ghosts, and impure-to-pure locals
|
||||
// Subphase 4: upper right back bins (pink)
|
||||
|
||||
// nstencil_ssa[5] = pos;
|
||||
// Subphase 5: upper left back bins (light green)
|
||||
|
||||
// nstencil_ssa[6] = pos;
|
||||
// Subphase 6: lower right back bins (white)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
|
||||
// nstencil_ssa[7] = pos;
|
||||
// Subphase 7: lower left back bins (purple)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
// nstencil_ssa[8] = pos;
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
NStencilStyle(half/bin/2d/newton/ssa,
|
||||
NStencilHalfBin2dNewtonSSA,
|
||||
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO)
|
||||
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST)
|
||||
|
||||
#else
|
||||
|
||||
|
|
|
@ -42,33 +42,112 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
|
|||
void NStencilHalfBin3dNewtonSSA::create()
|
||||
{
|
||||
int i,j,k,pos = 0;
|
||||
nstencil_ssa[0] = 0; // redundant info, but saves a conditional
|
||||
|
||||
// Include the centroid at the start.
|
||||
// It will be handled as part of Subphase 0.
|
||||
stencilxyz[pos][0] = 0;
|
||||
stencilxyz[pos][1] = 0;
|
||||
stencilxyz[pos][2] = 0;
|
||||
stencil[pos++] = 0;
|
||||
|
||||
// Subphase 0: upper right front bins (red)
|
||||
for (k = 0; k <= sz; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (k > 0 || j > 0 || (j == 0 && i > 0))
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
|
||||
// For k==0, make sure to skip already included bins
|
||||
|
||||
k = 0;
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (k > 0 || j > 0 || i > 0) // skip the centroid
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[1] = pos;
|
||||
// Subphase 1: upper left front bins (light blue)
|
||||
for (k = 0; k <= sz; k++)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[2] = pos;
|
||||
// Subphase 2: lower right front bins (yellow)
|
||||
for (k = 1; k <= sz; k++)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[3] = pos;
|
||||
// Subphase 3: lower left front bins (blue)
|
||||
for (k = 1; k <= sz; k++)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil_ssa[4] = pos; // record end of half stencil
|
||||
// Now include additional bins for AIR ghosts, and impure-to-pure locals
|
||||
// Subphase 4: upper right back bins (pink)
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
// nstencil_ssa[5] = pos;
|
||||
// Subphase 5: upper left back bins (light green)
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = 1; j <= sy; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
// nstencil_ssa[6] = pos;
|
||||
// Subphase 6: lower right back bins (white)
|
||||
for (k = -sz; k <= 0; k++)
|
||||
for (j = -sy; j < 0; j++)
|
||||
for (i = 0; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
// nstencil_ssa[7] = pos;
|
||||
// Subphase 7: lower left back bins (purple)
|
||||
for (k = -sz; k <= 0; k++)
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i < 0; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq) {
|
||||
stencilxyz[pos][0] = i;
|
||||
stencilxyz[pos][1] = j;
|
||||
stencilxyz[pos][2] = k;
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
//nstencil_ssa[8] = pos;
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
NStencilStyle(half/bin/3d/newton/ssa,
|
||||
NStencilHalfBin3dNewtonSSA,
|
||||
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO)
|
||||
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST)
|
||||
|
||||
#else
|
||||
|
||||
|
|
|
@ -20,11 +20,12 @@ namespace LAMMPS_NS {
|
|||
|
||||
class NStencilSSA : public NStencil {
|
||||
public:
|
||||
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { }
|
||||
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { xyzflag = 1; }
|
||||
~NStencilSSA() {}
|
||||
virtual void create() = 0;
|
||||
|
||||
int nstencil_half; // where the half stencil ends
|
||||
// first stencil index for each subphase, with last index at end
|
||||
int nstencil_ssa[5];
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -316,18 +316,17 @@ void PairDPDfdt::init_style()
|
|||
if (comm->ghost_velocity == 0)
|
||||
error->all(FLERR,"Pair dpd/fdt requires ghost atoms store velocity");
|
||||
|
||||
// if newton off, forces between atoms ij will be double computed
|
||||
// using different random numbers
|
||||
|
||||
if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR,
|
||||
"Pair dpd/fdt requires newton pair on");
|
||||
|
||||
splitFDT_flag = false;
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
|
||||
if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){
|
||||
splitFDT_flag = true;
|
||||
}
|
||||
|
||||
// if newton off, forces between atoms ij will be double computed
|
||||
// using different random numbers if splitFDT_flag is false
|
||||
if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR,
|
||||
"Pair dpd/fdt requires newton pair on if not also using fix shardlow");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -55,6 +55,8 @@ PairDPDfdtEnergy::PairDPDfdtEnergy(LAMMPS *lmp) : Pair(lmp)
|
|||
|
||||
PairDPDfdtEnergy::~PairDPDfdtEnergy()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
|
@ -403,19 +405,18 @@ void PairDPDfdtEnergy::init_style()
|
|||
if (comm->ghost_velocity == 0)
|
||||
error->all(FLERR,"Pair dpd/fdt/energy requires ghost atoms store velocity");
|
||||
|
||||
// if newton off, forces between atoms ij will be double computed
|
||||
// using different random numbers
|
||||
|
||||
if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR,
|
||||
"Pair dpd/fdt/energy requires newton pair on");
|
||||
|
||||
splitFDT_flag = false;
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
|
||||
if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){
|
||||
splitFDT_flag = true;
|
||||
}
|
||||
|
||||
// if newton off, forces between atoms ij will be double computed
|
||||
// using different random numbers if splitFDT_flag is false
|
||||
if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR,
|
||||
"Pair dpd/fdt/energy requires newton pair on if not also using fix shardlow");
|
||||
|
||||
bool eos_flag = false;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strncmp(modify->fix[i]->style,"eos",3) == 0) eos_flag = true;
|
||||
|
|
|
@ -31,8 +31,8 @@ class PairDPDfdtEnergy : public Pair {
|
|||
virtual void compute(int, int);
|
||||
virtual void settings(int, char **);
|
||||
virtual void coeff(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
virtual void init_style();
|
||||
virtual double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
virtual void write_restart_settings(FILE *);
|
||||
|
@ -46,15 +46,15 @@ class PairDPDfdtEnergy : public Pair {
|
|||
double **sigma,**kappa;
|
||||
double *duCond,*duMech;
|
||||
|
||||
int seed;
|
||||
class RanMars *random;
|
||||
|
||||
protected:
|
||||
double cut_global;
|
||||
int seed;
|
||||
bool splitFDT_flag;
|
||||
bool a0_is_zero;
|
||||
|
||||
void allocate();
|
||||
virtual void allocate();
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -84,11 +84,15 @@ PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp)
|
|||
|
||||
PairExp6rx::~PairExp6rx()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (params != NULL) {
|
||||
for (int i=0; i < nparams; ++i) {
|
||||
delete[] params[i].name;
|
||||
delete[] params[i].potential;
|
||||
}
|
||||
memory->destroy(params);
|
||||
}
|
||||
memory->destroy(mol2param);
|
||||
|
||||
if (allocated) {
|
||||
|
|
|
@ -30,13 +30,21 @@ class PairExp6rx : public Pair {
|
|||
virtual ~PairExp6rx();
|
||||
virtual void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
virtual void coeff(int, char **);
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
|
||||
struct Param {
|
||||
double epsilon,rm,alpha;
|
||||
int ispecies;
|
||||
char *name, *potential; // names of unique molecules and interaction type
|
||||
char *tablename; // name of interaction table
|
||||
int potentialType; // enumerated interaction potential type.
|
||||
};
|
||||
|
||||
protected:
|
||||
enum{LINEAR};
|
||||
enum{NONE,EXPONENT,POLYNOMIAL};
|
||||
|
@ -45,21 +53,14 @@ class PairExp6rx : public Pair {
|
|||
double **epsilon,**rm,**alpha;
|
||||
double **rminv,**buck1,**buck2,**offset;
|
||||
|
||||
void allocate();
|
||||
virtual void allocate();
|
||||
int *mol2param; // mapping from molecule to parameters
|
||||
int nparams; // # of stored parameter sets
|
||||
int maxparam; // max # of parameter sets
|
||||
struct Param {
|
||||
double epsilon,rm,alpha;
|
||||
int ispecies;
|
||||
char *name, *potential; // names of unique molecules and interaction type
|
||||
char *tablename; // name of interaction table
|
||||
int potentialType; // enumerated interaction potential type.
|
||||
};
|
||||
Param *params; // parameter set for an I-J-K interaction
|
||||
|
||||
int nspecies;
|
||||
void read_file(char *);
|
||||
virtual void read_file(char *);
|
||||
void read_file2(char *);
|
||||
void setup();
|
||||
|
||||
|
|
|
@ -85,6 +85,8 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp),
|
|||
|
||||
PairMultiLucyRX::~PairMultiLucyRX()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
|
||||
memory->sfree(tables);
|
||||
|
||||
|
|
|
@ -30,17 +30,17 @@ class PairMultiLucyRX : public Pair {
|
|||
virtual ~PairMultiLucyRX();
|
||||
|
||||
virtual void compute(int, int);
|
||||
void settings(int, char **);
|
||||
virtual void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
int pack_forward_comm(int, int *, double *, int, int *);
|
||||
void unpack_forward_comm(int, int, double *);
|
||||
int pack_reverse_comm(int, int, double *);
|
||||
void unpack_reverse_comm(int, int *, double *);
|
||||
virtual int pack_forward_comm(int, int *, double *, int, int *);
|
||||
virtual void unpack_forward_comm(int, int, double *);
|
||||
virtual int pack_reverse_comm(int, int, double *);
|
||||
virtual void unpack_reverse_comm(int, int *, double *);
|
||||
void computeLocalDensity();
|
||||
double rho_0;
|
||||
|
||||
|
@ -64,7 +64,7 @@ class PairMultiLucyRX : public Pair {
|
|||
|
||||
int **tabindex;
|
||||
|
||||
void allocate();
|
||||
virtual void allocate();
|
||||
void read_table(Table *, char *, char *);
|
||||
void param_extract(Table *, char *);
|
||||
void bcast_table(Table *);
|
||||
|
|
|
@ -33,8 +33,6 @@ using namespace LAMMPS_NS;
|
|||
|
||||
enum{NONE,RLINEAR,RSQ,BMP};
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
#ifdef DBL_EPSILON
|
||||
#define MY_EPSILON (10.0*DBL_EPSILON)
|
||||
#else
|
||||
|
@ -46,25 +44,19 @@ enum{NONE,RLINEAR,RSQ,BMP};
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp)
|
||||
PairTableRX::PairTableRX(LAMMPS *lmp) : PairTable(lmp)
|
||||
{
|
||||
ntables = 0;
|
||||
tables = NULL;
|
||||
fractionalWeighting = true;
|
||||
site1 = NULL;
|
||||
site2 = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairTableRX::~PairTableRX()
|
||||
{
|
||||
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
|
||||
memory->sfree(tables);
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
memory->destroy(tabindex);
|
||||
}
|
||||
delete [] site1;
|
||||
delete [] site2;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -252,24 +244,6 @@ void PairTableRX::compute(int eflag, int vflag)
|
|||
memory->destroy(mixWtSite2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
const int nt = atom->ntypes + 1;
|
||||
|
||||
memory->create(setflag,nt,nt,"pair:setflag");
|
||||
memory->create(cutsq,nt,nt,"pair:cutsq");
|
||||
memory->create(tabindex,nt,nt,"pair:tabindex");
|
||||
|
||||
memset(&setflag[0][0],0,nt*nt*sizeof(int));
|
||||
memset(&cutsq[0][0],0,nt*nt*sizeof(double));
|
||||
memset(&tabindex[0][0],0,nt*nt*sizeof(int));
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
@ -462,602 +436,6 @@ void PairTableRX::coeff(int narg, char **arg)
|
|||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairTableRX::init_one(int i, int j)
|
||||
{
|
||||
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
|
||||
|
||||
tabindex[j][i] = tabindex[i][j];
|
||||
|
||||
return tables[tabindex[i][j]].cut;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
read a table section from a tabulated potential file
|
||||
only called by proc 0
|
||||
this function sets these values in Table:
|
||||
ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::read_table(Table *tb, char *file, char *keyword)
|
||||
{
|
||||
char line[MAXLINE];
|
||||
|
||||
// open file
|
||||
|
||||
FILE *fp = force->open_potential(file);
|
||||
if (fp == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Cannot open file %s",file);
|
||||
error->one(FLERR,str);
|
||||
}
|
||||
|
||||
// loop until section found with matching keyword
|
||||
|
||||
while (1) {
|
||||
if (fgets(line,MAXLINE,fp) == NULL)
|
||||
error->one(FLERR,"Did not find keyword in table file");
|
||||
if (strspn(line," \t\n\r") == strlen(line)) continue; // blank line
|
||||
if (line[0] == '#') continue; // comment
|
||||
char *word = strtok(line," \t\n\r");
|
||||
if (strcmp(word,keyword) == 0) break; // matching keyword
|
||||
fgets(line,MAXLINE,fp); // no match, skip section
|
||||
param_extract(tb,line);
|
||||
fgets(line,MAXLINE,fp);
|
||||
for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp);
|
||||
}
|
||||
|
||||
// read args on 2nd line of section
|
||||
// allocate table arrays for file values
|
||||
|
||||
fgets(line,MAXLINE,fp);
|
||||
param_extract(tb,line);
|
||||
memory->create(tb->rfile,tb->ninput,"pair:rfile");
|
||||
memory->create(tb->efile,tb->ninput,"pair:efile");
|
||||
memory->create(tb->ffile,tb->ninput,"pair:ffile");
|
||||
|
||||
// setup bitmap parameters for table to read in
|
||||
|
||||
tb->ntablebits = 0;
|
||||
int masklo,maskhi,nmask,nshiftbits;
|
||||
if (tb->rflag == BMP) {
|
||||
while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++;
|
||||
if (1 << tb->ntablebits != tb->ninput)
|
||||
error->one(FLERR,"Bitmapped table is incorrect length in table file");
|
||||
init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits);
|
||||
}
|
||||
|
||||
// read r,e,f table values from file
|
||||
// if rflag set, compute r
|
||||
// if rflag not set, use r from file
|
||||
|
||||
int itmp;
|
||||
double rtmp;
|
||||
union_int_float_t rsq_lookup;
|
||||
|
||||
fgets(line,MAXLINE,fp);
|
||||
for (int i = 0; i < tb->ninput; i++) {
|
||||
fgets(line,MAXLINE,fp);
|
||||
sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]);
|
||||
|
||||
if (tb->rflag == RLINEAR)
|
||||
rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1);
|
||||
else if (tb->rflag == RSQ) {
|
||||
rtmp = tb->rlo*tb->rlo +
|
||||
(tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1);
|
||||
rtmp = sqrt(rtmp);
|
||||
} else if (tb->rflag == BMP) {
|
||||
rsq_lookup.i = i << nshiftbits;
|
||||
rsq_lookup.i |= masklo;
|
||||
if (rsq_lookup.f < tb->rlo*tb->rlo) {
|
||||
rsq_lookup.i = i << nshiftbits;
|
||||
rsq_lookup.i |= maskhi;
|
||||
}
|
||||
rtmp = sqrtf(rsq_lookup.f);
|
||||
}
|
||||
|
||||
tb->rfile[i] = rtmp;
|
||||
}
|
||||
|
||||
// close file
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
broadcast read-in table info from proc 0 to other procs
|
||||
this function communicates these values in Table:
|
||||
ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::bcast_table(Table *tb)
|
||||
{
|
||||
MPI_Bcast(&tb->ninput,1,MPI_INT,0,world);
|
||||
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
if (me > 0) {
|
||||
memory->create(tb->rfile,tb->ninput,"pair:rfile");
|
||||
memory->create(tb->efile,tb->ninput,"pair:efile");
|
||||
memory->create(tb->ffile,tb->ninput,"pair:ffile");
|
||||
}
|
||||
|
||||
MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world);
|
||||
|
||||
MPI_Bcast(&tb->rflag,1,MPI_INT,0,world);
|
||||
if (tb->rflag) {
|
||||
MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world);
|
||||
}
|
||||
MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world);
|
||||
if (tb->fpflag) {
|
||||
MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build spline representation of e,f over entire range of read-in table
|
||||
this function sets these values in Table: e2file,f2file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::spline_table(Table *tb)
|
||||
{
|
||||
memory->create(tb->e2file,tb->ninput,"pair:e2file");
|
||||
memory->create(tb->f2file,tb->ninput,"pair:f2file");
|
||||
|
||||
double ep0 = - tb->ffile[0];
|
||||
double epn = - tb->ffile[tb->ninput-1];
|
||||
spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file);
|
||||
|
||||
if (tb->fpflag == 0) {
|
||||
tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]);
|
||||
tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) /
|
||||
(tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]);
|
||||
}
|
||||
|
||||
double fp0 = tb->fplo;
|
||||
double fpn = tb->fphi;
|
||||
spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
extract attributes from parameter line in table section
|
||||
format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi
|
||||
N is required, other params are optional
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::param_extract(Table *tb, char *line)
|
||||
{
|
||||
tb->ninput = 0;
|
||||
tb->rflag = NONE;
|
||||
tb->fpflag = 0;
|
||||
|
||||
char *word = strtok(line," \t\n\r\f");
|
||||
while (word) {
|
||||
if (strcmp(word,"N") == 0) {
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->ninput = atoi(word);
|
||||
} else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 ||
|
||||
strcmp(word,"BITMAP") == 0) {
|
||||
if (strcmp(word,"R") == 0) tb->rflag = RLINEAR;
|
||||
else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ;
|
||||
else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP;
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->rlo = atof(word);
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->rhi = atof(word);
|
||||
} else if (strcmp(word,"FP") == 0) {
|
||||
tb->fpflag = 1;
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->fplo = atof(word);
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->fphi = atof(word);
|
||||
} else {
|
||||
printf("WORD: %s\n",word);
|
||||
error->one(FLERR,"Invalid keyword in pair table parameters");
|
||||
}
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
}
|
||||
|
||||
if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute r,e,f vectors from splined values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::compute_table(Table *tb)
|
||||
{
|
||||
int tlm1 = tablength-1;
|
||||
|
||||
// inner = inner table bound
|
||||
// cut = outer table bound
|
||||
// delta = table spacing in rsq for N-1 bins
|
||||
|
||||
double inner;
|
||||
if (tb->rflag) inner = tb->rlo;
|
||||
else inner = tb->rfile[0];
|
||||
tb->innersq = double(inner)*double(inner);
|
||||
tb->delta = double(tb->cut*tb->cut - double(tb->innersq)) / double(tlm1);
|
||||
tb->invdelta = 1.0/double(tb->delta);
|
||||
|
||||
// direct lookup tables
|
||||
// N-1 evenly spaced bins in rsq from inner to cut
|
||||
// e,f = value at midpt of bin
|
||||
// e,f are N-1 in length since store 1 value at bin midpt
|
||||
// f is converted to f/r when stored in f[i]
|
||||
// e,f are never a match to read-in values, always computed via spline interp
|
||||
|
||||
if (tabstyle == LOOKUP) {
|
||||
memory->create(tb->e,tlm1,"pair:e");
|
||||
memory->create(tb->f,tlm1,"pair:f");
|
||||
|
||||
double r,rsq;
|
||||
for (int i = 0; i < tlm1; i++) {
|
||||
rsq = tb->innersq + (i+0.5)*tb->delta;
|
||||
r = sqrt(rsq);
|
||||
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
|
||||
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
|
||||
}
|
||||
}
|
||||
|
||||
// linear tables
|
||||
// N-1 evenly spaced bins in rsq from inner to cut
|
||||
// rsq,e,f = value at lower edge of bin
|
||||
// de,df values = delta from lower edge to upper edge of bin
|
||||
// rsq,e,f are N in length so de,df arrays can compute difference
|
||||
// f is converted to f/r when stored in f[i]
|
||||
// e,f can match read-in values, else compute via spline interp
|
||||
|
||||
if (tabstyle == LINEAR) {
|
||||
memory->create(tb->rsq,tablength,"pair:rsq");
|
||||
memory->create(tb->e,tablength,"pair:e");
|
||||
memory->create(tb->f,tablength,"pair:f");
|
||||
memory->create(tb->de,tlm1,"pair:de");
|
||||
memory->create(tb->df,tlm1,"pair:df");
|
||||
|
||||
double r,rsq;
|
||||
for (int i = 0; i < tablength; i++) {
|
||||
rsq = tb->innersq + i*tb->delta;
|
||||
r = sqrt(rsq);
|
||||
tb->rsq[i] = rsq;
|
||||
if (tb->match) {
|
||||
tb->e[i] = tb->efile[i];
|
||||
tb->f[i] = tb->ffile[i]/r;
|
||||
} else {
|
||||
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
|
||||
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < tlm1; i++) {
|
||||
tb->de[i] = tb->e[i+1] - tb->e[i];
|
||||
tb->df[i] = tb->f[i+1] - tb->f[i];
|
||||
}
|
||||
}
|
||||
|
||||
// cubic spline tables
|
||||
// N-1 evenly spaced bins in rsq from inner to cut
|
||||
// rsq,e,f = value at lower edge of bin
|
||||
// e2,f2 = spline coefficient for each bin
|
||||
// rsq,e,f,e2,f2 are N in length so have N-1 spline bins
|
||||
// f is converted to f/r after e is splined
|
||||
// e,f can match read-in values, else compute via spline interp
|
||||
|
||||
if (tabstyle == SPLINE) {
|
||||
memory->create(tb->rsq,tablength,"pair:rsq");
|
||||
memory->create(tb->e,tablength,"pair:e");
|
||||
memory->create(tb->f,tablength,"pair:f");
|
||||
memory->create(tb->e2,tablength,"pair:e2");
|
||||
memory->create(tb->f2,tablength,"pair:f2");
|
||||
|
||||
tb->deltasq6 = tb->delta*tb->delta / 6.0;
|
||||
|
||||
double r,rsq;
|
||||
for (int i = 0; i < tablength; i++) {
|
||||
rsq = tb->innersq + i*tb->delta;
|
||||
r = sqrt(rsq);
|
||||
tb->rsq[i] = rsq;
|
||||
if (tb->match) {
|
||||
tb->e[i] = tb->efile[i];
|
||||
tb->f[i] = tb->ffile[i]/r;
|
||||
} else {
|
||||
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
|
||||
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r);
|
||||
}
|
||||
}
|
||||
|
||||
// ep0,epn = dh/dg at inner and at cut
|
||||
// h(r) = e(r) and g(r) = r^2
|
||||
// dh/dg = (de/dr) / 2r = -f/2r
|
||||
|
||||
double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq));
|
||||
double epn = - tb->f[tlm1] / (2.0 * tb->cut);
|
||||
spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2);
|
||||
|
||||
// fp0,fpn = dh/dg at inner and at cut
|
||||
// h(r) = f(r)/r and g(r) = r^2
|
||||
// dh/dg = (1/r df/dr - f/r^2) / 2r
|
||||
// dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1))
|
||||
|
||||
double fp0,fpn;
|
||||
double secant_factor = 0.1;
|
||||
if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) /
|
||||
(2.0 * sqrt(tb->innersq));
|
||||
else {
|
||||
double rsq1 = tb->innersq;
|
||||
double rsq2 = rsq1 + secant_factor*tb->delta;
|
||||
fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) /
|
||||
sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta);
|
||||
}
|
||||
|
||||
if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn =
|
||||
(tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut);
|
||||
else {
|
||||
double rsq2 = tb->cut * tb->cut;
|
||||
double rsq1 = rsq2 - secant_factor*tb->delta;
|
||||
fpn = (tb->f[tlm1] / sqrt(rsq2) -
|
||||
splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) /
|
||||
sqrt(rsq1)) / (secant_factor*tb->delta);
|
||||
}
|
||||
|
||||
for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]);
|
||||
spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2);
|
||||
}
|
||||
|
||||
// bitmapped linear tables
|
||||
// 2^N bins from inner to cut, spaced in bitmapped manner
|
||||
// f is converted to f/r when stored in f[i]
|
||||
// e,f can match read-in values, else compute via spline interp
|
||||
|
||||
if (tabstyle == BITMAP) {
|
||||
double r;
|
||||
union_int_float_t rsq_lookup;
|
||||
int masklo,maskhi;
|
||||
|
||||
// linear lookup tables of length ntable = 2^n
|
||||
// stored value = value at lower edge of bin
|
||||
|
||||
init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits);
|
||||
int ntable = 1 << tablength;
|
||||
int ntablem1 = ntable - 1;
|
||||
|
||||
memory->create(tb->rsq,ntable,"pair:rsq");
|
||||
memory->create(tb->e,ntable,"pair:e");
|
||||
memory->create(tb->f,ntable,"pair:f");
|
||||
memory->create(tb->de,ntable,"pair:de");
|
||||
memory->create(tb->df,ntable,"pair:df");
|
||||
memory->create(tb->drsq,ntable,"pair:drsq");
|
||||
|
||||
union_int_float_t minrsq_lookup;
|
||||
minrsq_lookup.i = 0 << tb->nshiftbits;
|
||||
minrsq_lookup.i |= maskhi;
|
||||
|
||||
for (int i = 0; i < ntable; i++) {
|
||||
rsq_lookup.i = i << tb->nshiftbits;
|
||||
rsq_lookup.i |= masklo;
|
||||
if (rsq_lookup.f < tb->innersq) {
|
||||
rsq_lookup.i = i << tb->nshiftbits;
|
||||
rsq_lookup.i |= maskhi;
|
||||
}
|
||||
r = sqrtf(rsq_lookup.f);
|
||||
tb->rsq[i] = rsq_lookup.f;
|
||||
if (tb->match) {
|
||||
tb->e[i] = tb->efile[i];
|
||||
tb->f[i] = tb->ffile[i]/r;
|
||||
} else {
|
||||
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
|
||||
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
|
||||
}
|
||||
minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
|
||||
}
|
||||
|
||||
tb->innersq = minrsq_lookup.f;
|
||||
|
||||
for (int i = 0; i < ntablem1; i++) {
|
||||
tb->de[i] = tb->e[i+1] - tb->e[i];
|
||||
tb->df[i] = tb->f[i+1] - tb->f[i];
|
||||
tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]);
|
||||
}
|
||||
|
||||
// get the delta values for the last table entries
|
||||
// tables are connected periodically between 0 and ntablem1
|
||||
|
||||
tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1];
|
||||
tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1];
|
||||
tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]);
|
||||
|
||||
// get the correct delta values at itablemax
|
||||
// smallest r is in bin itablemin
|
||||
// largest r is in bin itablemax, which is itablemin-1,
|
||||
// or ntablem1 if itablemin=0
|
||||
|
||||
// deltas at itablemax only needed if corresponding rsq < cut*cut
|
||||
// if so, compute deltas between rsq and cut*cut
|
||||
// if tb->match, data at cut*cut is unavailable, so we'll take
|
||||
// deltas at itablemax-1 as a good approximation
|
||||
|
||||
double e_tmp,f_tmp;
|
||||
int itablemin = minrsq_lookup.i & tb->nmask;
|
||||
itablemin >>= tb->nshiftbits;
|
||||
int itablemax = itablemin - 1;
|
||||
if (itablemin == 0) itablemax = ntablem1;
|
||||
int itablemaxm1 = itablemax - 1;
|
||||
if (itablemax == 0) itablemaxm1 = ntablem1;
|
||||
rsq_lookup.i = itablemax << tb->nshiftbits;
|
||||
rsq_lookup.i |= maskhi;
|
||||
if (rsq_lookup.f < tb->cut*tb->cut) {
|
||||
if (tb->match) {
|
||||
tb->de[itablemax] = tb->de[itablemaxm1];
|
||||
tb->df[itablemax] = tb->df[itablemaxm1];
|
||||
tb->drsq[itablemax] = tb->drsq[itablemaxm1];
|
||||
} else {
|
||||
rsq_lookup.f = tb->cut*tb->cut;
|
||||
r = sqrtf(rsq_lookup.f);
|
||||
e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
|
||||
f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
|
||||
tb->de[itablemax] = e_tmp - tb->e[itablemax];
|
||||
tb->df[itablemax] = f_tmp - tb->f[itablemax];
|
||||
tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set all ptrs in a table to NULL, so can be freed safely
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::null_table(Table *tb)
|
||||
{
|
||||
tb->rfile = tb->efile = tb->ffile = NULL;
|
||||
tb->e2file = tb->f2file = NULL;
|
||||
tb->rsq = tb->drsq = tb->e = tb->de = NULL;
|
||||
tb->f = tb->df = tb->e2 = tb->f2 = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays in a table
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::free_table(Table *tb)
|
||||
{
|
||||
memory->destroy(tb->rfile);
|
||||
memory->destroy(tb->efile);
|
||||
memory->destroy(tb->ffile);
|
||||
memory->destroy(tb->e2file);
|
||||
memory->destroy(tb->f2file);
|
||||
|
||||
memory->destroy(tb->rsq);
|
||||
memory->destroy(tb->drsq);
|
||||
memory->destroy(tb->e);
|
||||
memory->destroy(tb->de);
|
||||
memory->destroy(tb->f);
|
||||
memory->destroy(tb->df);
|
||||
memory->destroy(tb->e2);
|
||||
memory->destroy(tb->f2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
spline and splint routines modified from Numerical Recipes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::spline(double *x, double *y, int n,
|
||||
double yp1, double ypn, double *y2)
|
||||
{
|
||||
int i,k;
|
||||
double p,qn,sig,un;
|
||||
double *u = new double[n];
|
||||
|
||||
if (yp1 > 0.99e30) y2[0] = u[0] = 0.0;
|
||||
else {
|
||||
y2[0] = -0.5;
|
||||
u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1);
|
||||
}
|
||||
for (i = 1; i < n-1; i++) {
|
||||
sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]);
|
||||
p = sig*y2[i-1] + 2.0;
|
||||
y2[i] = (sig-1.0) / p;
|
||||
u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]);
|
||||
u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p;
|
||||
}
|
||||
if (ypn > 0.99e30) qn = un = 0.0;
|
||||
else {
|
||||
qn = 0.5;
|
||||
un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2]));
|
||||
}
|
||||
y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0);
|
||||
for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k];
|
||||
|
||||
delete [] u;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairTableRX::splint(double *xa, double *ya, double *y2a, int n, double x)
|
||||
{
|
||||
int klo,khi,k;
|
||||
double h,b,a,y;
|
||||
|
||||
klo = 0;
|
||||
khi = n-1;
|
||||
while (khi-klo > 1) {
|
||||
k = (khi+klo) >> 1;
|
||||
if (xa[k] > x) khi = k;
|
||||
else klo = k;
|
||||
}
|
||||
h = xa[khi]-xa[klo];
|
||||
a = (xa[khi]-x) / h;
|
||||
b = (x-xa[klo]) / h;
|
||||
y = a*ya[klo] + b*ya[khi] +
|
||||
((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0;
|
||||
return y;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::write_restart(FILE *fp)
|
||||
{
|
||||
write_restart_settings(fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::read_restart(FILE *fp)
|
||||
{
|
||||
read_restart_settings(fp);
|
||||
allocate();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::write_restart_settings(FILE *fp)
|
||||
{
|
||||
fwrite(&tabstyle,sizeof(int),1,fp);
|
||||
fwrite(&tablength,sizeof(int),1,fp);
|
||||
fwrite(&ewaldflag,sizeof(int),1,fp);
|
||||
fwrite(&pppmflag,sizeof(int),1,fp);
|
||||
fwrite(&msmflag,sizeof(int),1,fp);
|
||||
fwrite(&dispersionflag,sizeof(int),1,fp);
|
||||
fwrite(&tip4pflag,sizeof(int),1,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::read_restart_settings(FILE *fp)
|
||||
{
|
||||
if (comm->me == 0) {
|
||||
fread(&tabstyle,sizeof(int),1,fp);
|
||||
fread(&tablength,sizeof(int),1,fp);
|
||||
fread(&ewaldflag,sizeof(int),1,fp);
|
||||
fread(&pppmflag,sizeof(int),1,fp);
|
||||
fread(&msmflag,sizeof(int),1,fp);
|
||||
fread(&dispersionflag,sizeof(int),1,fp);
|
||||
fread(&tip4pflag,sizeof(int),1,fp);
|
||||
}
|
||||
MPI_Bcast(&tabstyle,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&tablength,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&ewaldflag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&pppmflag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&msmflag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&dispersionflag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&tip4pflag,1,MPI_INT,0,world);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
|
||||
|
@ -1129,26 +507,6 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
|
|||
return factor_lj*phi;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return the Coulomb cutoff for tabled potentials
|
||||
called by KSpace solvers which require that all pairwise cutoffs be the same
|
||||
loop over all tables not just those indexed by tabindex[i][j] since
|
||||
no way to know which tables are active since pair::init() not yet called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void *PairTableRX::extract(const char *str, int &dim)
|
||||
{
|
||||
if (strcmp(str,"cut_coul") != 0) return NULL;
|
||||
if (ntables == 0) error->all(FLERR,"All pair coeffs are not set");
|
||||
|
||||
double cut_coul = tables[0].cut;
|
||||
for (int m = 1; m < ntables; m++)
|
||||
if (tables[m].cut != cut_coul)
|
||||
error->all(FLERR,"Pair table cutoffs must all be equal to use with KSpace");
|
||||
dim = 0;
|
||||
return &tables[0].cut;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairTableRX::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2)
|
||||
|
|
|
@ -20,11 +20,11 @@ PairStyle(table/rx,PairTableRX)
|
|||
#ifndef LMP_PAIR_TABLE_RX_H
|
||||
#define LMP_PAIR_TABLE_RX_H
|
||||
|
||||
#include "pair.h"
|
||||
#include "pair_table.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairTableRX : public Pair {
|
||||
class PairTableRX : public PairTable {
|
||||
public:
|
||||
PairTableRX(class LAMMPS *);
|
||||
virtual ~PairTableRX();
|
||||
|
@ -32,42 +32,9 @@ class PairTableRX : public Pair {
|
|||
virtual void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
void *extract(const char *, int &);
|
||||
virtual double single(int, int, int, int, double, double, double, double &);
|
||||
|
||||
protected:
|
||||
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
|
||||
|
||||
int tabstyle,tablength;
|
||||
struct Table {
|
||||
int ninput,rflag,fpflag,match,ntablebits;
|
||||
int nshiftbits,nmask;
|
||||
double rlo,rhi,fplo,fphi,cut;
|
||||
double *rfile,*efile,*ffile;
|
||||
double *e2file,*f2file;
|
||||
double innersq,delta,invdelta,deltasq6;
|
||||
double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
|
||||
};
|
||||
int ntables;
|
||||
Table *tables;
|
||||
|
||||
int **tabindex;
|
||||
|
||||
void allocate();
|
||||
void read_table(Table *, char *, char *);
|
||||
void param_extract(Table *, char *);
|
||||
void bcast_table(Table *);
|
||||
void spline_table(Table *);
|
||||
void compute_table(Table *);
|
||||
void null_table(Table *);
|
||||
void free_table(Table *);
|
||||
void spline(double *, double *, int, double, double, double *);
|
||||
double splint(double *, double *, double *, int, double);
|
||||
|
||||
int nspecies;
|
||||
char *site1, *site2;
|
||||
|
|
|
@ -103,7 +103,6 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
|
|||
uCond = uMech = uChem = uCG = uCGnew = NULL;
|
||||
duChem = NULL;
|
||||
dpdTheta = NULL;
|
||||
ssaAIR = NULL;
|
||||
|
||||
// USER-MESO
|
||||
|
||||
|
@ -305,7 +304,6 @@ Atom::~Atom()
|
|||
memory->destroy(uCG);
|
||||
memory->destroy(uCGnew);
|
||||
memory->destroy(duChem);
|
||||
memory->destroy(ssaAIR);
|
||||
|
||||
memory->destroy(cc);
|
||||
memory->destroy(cc_flux);
|
||||
|
@ -346,10 +344,12 @@ Atom::~Atom()
|
|||
delete [] iname[i];
|
||||
memory->destroy(ivector[i]);
|
||||
}
|
||||
if (dvector != NULL) {
|
||||
for (int i = 0; i < ndvector; i++) {
|
||||
delete [] dname[i];
|
||||
memory->destroy(dvector[i]);
|
||||
}
|
||||
}
|
||||
|
||||
memory->sfree(iname);
|
||||
memory->sfree(dname);
|
||||
|
|
|
@ -93,7 +93,6 @@ class Atom : protected Pointers {
|
|||
double *duChem;
|
||||
double *dpdTheta;
|
||||
int nspecies_dpd;
|
||||
int *ssaAIR; // Shardlow Splitting Algorithm Active Interaction Region number
|
||||
|
||||
// USER-MESO package
|
||||
|
||||
|
@ -262,8 +261,8 @@ class Atom : protected Pointers {
|
|||
void update_callback(int);
|
||||
|
||||
int find_custom(const char *, int &);
|
||||
int add_custom(const char *, int);
|
||||
void remove_custom(int, int);
|
||||
virtual int add_custom(const char *, int);
|
||||
virtual void remove_custom(int, int);
|
||||
|
||||
virtual void sync_modify(ExecutionSpace, unsigned int, unsigned int) {}
|
||||
|
||||
|
|
|
@ -42,6 +42,18 @@
|
|||
#define ENERGY_MASK 0x00010000
|
||||
#define VIRIAL_MASK 0x00020000
|
||||
|
||||
// DPD
|
||||
|
||||
#define DPDRHO_MASK 0x00040000
|
||||
#define DPDTHETA_MASK 0x00080000
|
||||
#define UCOND_MASK 0x00100000
|
||||
#define UMECH_MASK 0x00200000
|
||||
#define UCHEM_MASK 0x00400000
|
||||
#define UCG_MASK 0x00800000
|
||||
#define UCGNEW_MASK 0x01000000
|
||||
#define DUCHEM_MASK 0x02000000
|
||||
#define DVECTOR_MASK 0x04000000
|
||||
|
||||
// granular
|
||||
|
||||
#define RADIUS_MASK 0x00100000
|
||||
|
|
|
@ -134,7 +134,6 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
// register with Atom class
|
||||
|
||||
nmax_old = 0;
|
||||
grow_arrays(atom->nmax);
|
||||
atom->add_callback(0);
|
||||
atom->add_callback(1);
|
||||
if (border) atom->add_callback(2);
|
||||
|
@ -190,6 +189,8 @@ int FixPropertyAtom::setmask()
|
|||
|
||||
void FixPropertyAtom::init()
|
||||
{
|
||||
grow_arrays(atom->nmax);
|
||||
|
||||
// error if atom style has changed since fix was defined
|
||||
// don't allow this b/c user could change to style that defines molecule,q
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
|
|||
class FixPropertyAtom : public Fix {
|
||||
public:
|
||||
FixPropertyAtom(class LAMMPS *, int, char **);
|
||||
~FixPropertyAtom();
|
||||
virtual ~FixPropertyAtom();
|
||||
int setmask();
|
||||
void init();
|
||||
|
||||
|
@ -38,7 +38,7 @@ class FixPropertyAtom : public Fix {
|
|||
void write_data_section_keyword(int, FILE *);
|
||||
void write_data_section(int, FILE *, int, double **, int);
|
||||
|
||||
void grow_arrays(int);
|
||||
virtual void grow_arrays(int);
|
||||
void copy_arrays(int, int, int);
|
||||
int pack_border(int, int *, double *);
|
||||
int unpack_border(int, int, double *);
|
||||
|
@ -50,7 +50,7 @@ class FixPropertyAtom : public Fix {
|
|||
int maxsize_restart();
|
||||
double memory_usage();
|
||||
|
||||
private:
|
||||
protected:
|
||||
int nvalue,border;
|
||||
int molecule_flag,q_flag,rmass_flag;
|
||||
int *style,*index;
|
||||
|
|
|
@ -201,6 +201,8 @@ FixWall::FixWall(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
FixWall::~FixWall()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
for (int m = 0; m < nwall; m++) {
|
||||
delete [] xstr[m];
|
||||
delete [] estr[m];
|
||||
|
|
|
@ -28,9 +28,9 @@ class FixWallLJ93 : public FixWall {
|
|||
public:
|
||||
FixWallLJ93(class LAMMPS *, int, char **);
|
||||
void precompute(int);
|
||||
void wall_particle(int, int, double);
|
||||
virtual void wall_particle(int, int, double);
|
||||
|
||||
private:
|
||||
protected:
|
||||
double coeff1[6],coeff2[6],coeff3[6],coeff4[6],offset[6];
|
||||
};
|
||||
|
||||
|
|
|
@ -79,7 +79,8 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp)
|
|||
|
||||
// USER-DPD package
|
||||
|
||||
ndxAIR_ssa = NULL;
|
||||
for (int i = 0; i < 8; i++) AIRct_ssa[i] = 0;
|
||||
np = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -99,10 +100,6 @@ NeighList::~NeighList()
|
|||
|
||||
delete [] iskip;
|
||||
memory->destroy(ijskip);
|
||||
|
||||
if (ssa) {
|
||||
memory->sfree(ndxAIR_ssa);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -203,14 +200,16 @@ void NeighList::grow(int nlocal, int nall)
|
|||
if (listmiddle) listmiddle->grow(nlocal,nall);
|
||||
|
||||
// skip if data structs are already big enough
|
||||
|
||||
if (ghost) {
|
||||
if (ssa) {
|
||||
if ((nlocal * 3) + nall <= maxatom) return;
|
||||
} else if (ghost) {
|
||||
if (nall <= maxatom) return;
|
||||
} else {
|
||||
if (nlocal <= maxatom) return;
|
||||
}
|
||||
|
||||
maxatom = atom->nmax;
|
||||
if (ssa) maxatom = (nlocal * 3) + nall;
|
||||
else maxatom = atom->nmax;
|
||||
|
||||
memory->destroy(ilist);
|
||||
memory->destroy(numneigh);
|
||||
|
@ -224,12 +223,6 @@ void NeighList::grow(int nlocal, int nall)
|
|||
firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *),
|
||||
"neighlist:firstdouble");
|
||||
}
|
||||
|
||||
if (ssa) {
|
||||
if (ndxAIR_ssa) memory->sfree(ndxAIR_ssa);
|
||||
ndxAIR_ssa = (uint16_t (*)[8]) memory->smalloc(sizeof(uint16_t)*8*maxatom,
|
||||
"neighlist:ndxAIR_ssa");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -306,7 +299,5 @@ bigint NeighList::memory_usage()
|
|||
}
|
||||
}
|
||||
|
||||
if (ndxAIR_ssa) bytes += sizeof(uint16_t) * 8 * maxatom;
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue