forked from lijiext/lammps
Removing unnecessary fences
This commit is contained in:
parent
286d4f2743
commit
ed50bd2254
|
@ -111,7 +111,6 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleCharmmCompute<0,0> >(0,nanglelist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
|
|
|
@ -110,7 +110,6 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondClass2Compute<0,0> >(0,nbondlist),*this);
|
||||
}
|
||||
}
|
||||
//DeviceType::fence();
|
||||
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
|
|
|
@ -125,7 +125,6 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondFENECompute<0,0> >(0,nbondlist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
k_warning_flag.template modify<DeviceType>();
|
||||
k_warning_flag.template sync<LMPHostType>();
|
||||
|
|
|
@ -111,7 +111,6 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this);
|
||||
}
|
||||
}
|
||||
//DeviceType::fence();
|
||||
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
|
|
|
@ -63,7 +63,6 @@ double ComputeTempKokkos<DeviceType>::compute_scalar()
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk);
|
||||
else
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
t = t_kk.t0; // could make this more efficient
|
||||
|
@ -118,7 +117,6 @@ void ComputeTempKokkos<DeviceType>::compute_vector()
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk);
|
||||
else
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
t[0] = t_kk.t0;
|
||||
|
|
|
@ -132,7 +132,6 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmCompute<0,0> >(0,ndihedrallist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// error check
|
||||
|
||||
|
|
|
@ -159,7 +159,6 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralClass2Compute<0,0> >(0,ndihedrallist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// error check
|
||||
|
||||
|
|
|
@ -121,7 +121,6 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralOPLSCompute<0,0> >(0,ndihedrallist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// error check
|
||||
|
||||
|
|
|
@ -506,7 +506,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
|
|||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if(tbiasflag == BIAS){
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
|
@ -531,7 +530,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
|
|||
// set total force zero in parallel on the device
|
||||
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
|
||||
Kokkos::parallel_for(nlocal,zero_functor);
|
||||
DeviceType::fence();
|
||||
}
|
||||
// f is modified by both post_force and zero_force functors
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
@ -726,7 +724,6 @@ double FixLangevinKokkos<DeviceType>::compute_scalar()
|
|||
k_flangevin.template sync<DeviceType>();
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
|
||||
DeviceType::fence();
|
||||
energy = 0.5*energy_onestep*update->dt;
|
||||
}
|
||||
|
||||
|
@ -770,7 +767,6 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
|
|||
k_flangevin.template sync<DeviceType>();
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
|
||||
DeviceType::fence();
|
||||
|
||||
energy += energy_onestep*update->dt;
|
||||
}
|
||||
|
|
|
@ -495,7 +495,6 @@ void FixNHKokkos<DeviceType>::nh_v_press()
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<1> >(0,nlocal),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<0> >(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
|
@ -550,7 +549,6 @@ void FixNHKokkos<DeviceType>::nve_v()
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -595,7 +593,6 @@ void FixNHKokkos<DeviceType>::nve_x()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_x>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -631,7 +628,6 @@ void FixNHKokkos<DeviceType>::nh_v_temp()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_temp>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
|
|
|
@ -76,7 +76,6 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
|
|||
FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
}
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -133,7 +132,6 @@ void FixNVEKokkos<DeviceType>::final_integrate()
|
|||
FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// debug
|
||||
//atomKK->sync(Host,datamask_read);
|
||||
|
|
|
@ -234,12 +234,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
|
|||
// compute_H
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this);
|
||||
Kokkos::parallel_scan(inum,computeH_functor);
|
||||
DeviceType::fence();
|
||||
|
||||
// init_matvec
|
||||
FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this);
|
||||
Kokkos::parallel_for(inum,matvec_functor);
|
||||
DeviceType::fence();
|
||||
|
||||
// comm->forward_comm_fix(this); //Dist_vector( s );
|
||||
pack_flag = 2;
|
||||
|
@ -259,15 +257,12 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
|
|||
|
||||
// 1st cg solve over b_s, s
|
||||
cg_solve1();
|
||||
DeviceType::fence();
|
||||
|
||||
// 2nd cg solve over b_t, t
|
||||
cg_solve2();
|
||||
DeviceType::fence();
|
||||
|
||||
// calculate_Q();
|
||||
calculate_q();
|
||||
DeviceType::fence();
|
||||
|
||||
copymode = 0;
|
||||
|
||||
|
@ -354,7 +349,6 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array()
|
|||
const int ignum = atom->nlocal + atom->nghost;
|
||||
FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this);
|
||||
Kokkos::parallel_for(ignum,zero_functor);
|
||||
DeviceType::fence();
|
||||
|
||||
}
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -499,10 +493,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
// sparse_matvec( &H, x, q );
|
||||
FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse12_functor);
|
||||
DeviceType::fence();
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
|
||||
DeviceType::fence();
|
||||
if (neighflag == HALF) {
|
||||
FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse13_functor);
|
||||
|
@ -513,7 +505,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
} else {
|
||||
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
|
@ -529,21 +520,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
F_FLOAT my_norm = 0.0;
|
||||
FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this);
|
||||
Kokkos::parallel_reduce(inum,norm1_functor,my_norm);
|
||||
DeviceType::fence();
|
||||
F_FLOAT norm_sqr = 0.0;
|
||||
MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
b_norm = sqrt(norm_sqr);
|
||||
DeviceType::fence();
|
||||
|
||||
// sig_new = parallel_dot( r, d, nn);
|
||||
F_FLOAT my_dot = 0.0;
|
||||
FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
F_FLOAT dot_sqr = 0.0;
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
F_FLOAT sig_new = dot_sqr;
|
||||
DeviceType::fence();
|
||||
|
||||
int loop;
|
||||
const int loopmax = 200;
|
||||
|
@ -560,10 +547,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
// sparse_matvec( &H, d, q );
|
||||
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse22_functor);
|
||||
DeviceType::fence();
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
|
||||
DeviceType::fence();
|
||||
if (neighflag == HALF) {
|
||||
FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse23_functor);
|
||||
|
@ -574,7 +559,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
} else {
|
||||
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
|
||||
if (neighflag != FULL) {
|
||||
|
@ -589,7 +573,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
my_dot = dot_sqr = 0.0;
|
||||
FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
tmp = dot_sqr;
|
||||
|
||||
|
@ -602,12 +585,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
my_dot = dot_sqr = 0.0;
|
||||
FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this);
|
||||
Kokkos::parallel_for(inum,precon1_functor);
|
||||
DeviceType::fence();
|
||||
// preconditioning: p[j] = r[j] * Hdia_inv[j];
|
||||
// sig_new = parallel_dot( r, p, nn);
|
||||
FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
|
||||
Kokkos::parallel_reduce(inum,precon_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
sig_new = dot_sqr;
|
||||
|
||||
|
@ -616,7 +597,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
|||
// vector_sum( d, 1., p, beta, d, nn );
|
||||
FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
|
||||
Kokkos::parallel_for(inum,vecsum2_functor);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
if (loop >= loopmax && comm->me == 0) {
|
||||
|
@ -644,10 +624,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
// sparse_matvec( &H, x, q );
|
||||
FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse32_functor);
|
||||
DeviceType::fence();
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
|
||||
DeviceType::fence();
|
||||
if (neighflag == HALF) {
|
||||
FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse33_functor);
|
||||
|
@ -658,7 +636,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
} else {
|
||||
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
|
@ -674,21 +651,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
F_FLOAT my_norm = 0.0;
|
||||
FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this);
|
||||
Kokkos::parallel_reduce(inum,norm2_functor,my_norm);
|
||||
DeviceType::fence();
|
||||
F_FLOAT norm_sqr = 0.0;
|
||||
MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
b_norm = sqrt(norm_sqr);
|
||||
DeviceType::fence();
|
||||
|
||||
// sig_new = parallel_dot( r, d, nn);
|
||||
F_FLOAT my_dot = 0.0;
|
||||
FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
F_FLOAT dot_sqr = 0.0;
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
F_FLOAT sig_new = dot_sqr;
|
||||
DeviceType::fence();
|
||||
|
||||
int loop;
|
||||
const int loopmax = 200;
|
||||
|
@ -705,10 +678,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
// sparse_matvec( &H, d, q );
|
||||
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse22_functor);
|
||||
DeviceType::fence();
|
||||
if (neighflag != FULL) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
|
||||
DeviceType::fence();
|
||||
if (neighflag == HALF) {
|
||||
FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
|
||||
Kokkos::parallel_for(inum,sparse23_functor);
|
||||
|
@ -719,7 +690,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
} else {
|
||||
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
|
@ -733,10 +703,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
my_dot = dot_sqr = 0.0;
|
||||
FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
|
||||
Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
tmp = dot_sqr;
|
||||
DeviceType::fence();
|
||||
|
||||
alpha = sig_new / tmp;
|
||||
|
||||
|
@ -747,12 +715,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
my_dot = dot_sqr = 0.0;
|
||||
FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this);
|
||||
Kokkos::parallel_for(inum,precon2_functor);
|
||||
DeviceType::fence();
|
||||
// preconditioning: p[j] = r[j] * Hdia_inv[j];
|
||||
// sig_new = parallel_dot( r, p, nn);
|
||||
FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
|
||||
Kokkos::parallel_reduce(inum,precon_functor,my_dot);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
sig_new = dot_sqr;
|
||||
|
||||
|
@ -761,7 +727,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
|||
// vector_sum( d, 1., p, beta, d, nn );
|
||||
FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
|
||||
Kokkos::parallel_for(inum,vecsum2_functor);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
if (loop >= loopmax && comm->me == 0) {
|
||||
|
@ -786,7 +751,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
|
|||
sum = sum_all = 0.0;
|
||||
FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this);
|
||||
Kokkos::parallel_reduce(inum,vecacc1_functor,sum);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
const F_FLOAT s_sum = sum_all;
|
||||
|
||||
|
@ -794,7 +758,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
|
|||
sum = sum_all = 0.0;
|
||||
FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this);
|
||||
Kokkos::parallel_reduce(inum,vecacc2_functor,sum);
|
||||
DeviceType::fence();
|
||||
MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
|
||||
const F_FLOAT t_sum = sum_all;
|
||||
|
||||
|
@ -804,7 +767,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
|
|||
// q[i] = s[i] - u * t[i];
|
||||
FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
|
||||
Kokkos::parallel_for(inum,calculateQ_functor);
|
||||
DeviceType::fence();
|
||||
|
||||
pack_flag = 4;
|
||||
//comm->forward_comm_fix( this ); //Dist_vector( atom->q );
|
||||
|
|
|
@ -108,7 +108,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
|
|||
if (varflag == CONSTANT) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceConstant>(0,nlocal),*this,foriginal_kk);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// variable force, wrap with clear/add
|
||||
|
@ -138,7 +137,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceNonConstant>(0,nlocal),*this,foriginal_kk);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,6 @@ void FixWallReflectKokkos<DeviceType>::post_integrate()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallReflectPostIntegrate>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -140,7 +140,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2Compute<0,0> >(0,nimproperlist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
|
||||
// error check
|
||||
|
@ -165,7 +164,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2AngleAngle<0,0> >(0,nimproperlist),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
|
||||
if (vflag_global) {
|
||||
|
|
|
@ -128,7 +128,6 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this);
|
||||
}
|
||||
}
|
||||
//DeviceType::fence();
|
||||
|
||||
// error check
|
||||
|
||||
|
|
|
@ -409,7 +409,6 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i
|
|||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -428,7 +427,6 @@ void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first
|
|||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
|
|
@ -133,7 +133,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
|
@ -156,7 +155,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// communicate and sum densities (on the host)
|
||||
|
||||
|
@ -174,7 +172,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
} else if (neighflag == FULL) {
|
||||
|
||||
|
@ -184,7 +181,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
|
@ -239,7 +235,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
}
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
|
@ -414,7 +409,6 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_
|
|||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -433,7 +427,6 @@ void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in
|
|||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
|
|
@ -409,7 +409,6 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d
|
|||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMPackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -428,7 +427,6 @@ void PairEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in,
|
|||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMUnpackForwardComm>(0,n),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
|
|
@ -731,7 +731,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this);
|
||||
}
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[13] = ev.ecoul;
|
||||
|
||||
|
@ -771,7 +770,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[10] = ev.evdwl;
|
||||
pvector[11] = ev.ecoul;
|
||||
|
@ -800,7 +798,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
|
||||
// zero
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
if (neighflag == HALF)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this);
|
||||
|
@ -808,7 +805,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this);
|
||||
else //(neighflag == FULL)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
k_resize_bo.modify<DeviceType>();
|
||||
k_resize_bo.sync<LMPHostType>();
|
||||
|
@ -827,15 +823,11 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
// Bond order
|
||||
if (neighflag == HALF) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
// Bond energy
|
||||
if (neighflag == HALF) {
|
||||
|
@ -843,7 +835,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[0] = ev.evdwl;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
|
@ -851,7 +842,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[0] = ev.evdwl;
|
||||
}
|
||||
|
@ -859,21 +849,17 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
// Multi-body corrections
|
||||
if (neighflag == HALF) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
}
|
||||
pvector[2] = ev.ereax[0];
|
||||
|
@ -887,14 +873,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
}
|
||||
pvector[4] = ev.ereax[3];
|
||||
|
@ -908,14 +892,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
}
|
||||
pvector[8] = ev.ereax[6];
|
||||
|
@ -929,14 +911,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
}
|
||||
}
|
||||
|
@ -946,22 +926,18 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
// Bond force
|
||||
if (neighflag == HALF) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[0] += ev.evdwl;
|
||||
} else { //if (neighflag == HALFTHREAD) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
if (evflag)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this);
|
||||
DeviceType::fence();
|
||||
ev_all += ev;
|
||||
pvector[0] += ev.evdwl;
|
||||
}
|
||||
|
@ -3945,11 +3921,9 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag)
|
|||
if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
|
||||
if (eflag_atom) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
// if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
|
||||
|
@ -4002,7 +3976,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
|
|||
{
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondZero>(0,nmax),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
bo_cut_bond = control->bg_cut;
|
||||
|
||||
|
@ -4017,7 +3990,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
|
|||
numbonds = 0;
|
||||
PairReaxCKokkosFindBondFunctor<DeviceType> find_bond_functor(this);
|
||||
Kokkos::parallel_reduce(inum,find_bond_functor,numbonds);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -4076,7 +4048,6 @@ void PairReaxCKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, int
|
|||
nlocal = atomKK->nlocal;
|
||||
PairReaxCKokkosPackBondBufferFunctor<DeviceType> pack_bond_buffer_functor(this);
|
||||
Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_buf.modify<DeviceType>();
|
||||
|
@ -4135,11 +4106,9 @@ void PairReaxCKokkos<DeviceType>::FindBondSpecies()
|
|||
{
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpeciesZero>(0,nmax),*this);
|
||||
DeviceType::fence();
|
||||
|
||||
nlocal = atomKK->nlocal;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpecies>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added
|
||||
|
|
|
@ -403,17 +403,14 @@ void PPPMKokkos<DeviceType>::setup()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup1>(nxlo_fft,nxhi_fft+1),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup2>(nylo_fft,nyhi_fft+1),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup3>(nzlo_fft,nzhi_fft+1),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// merge three outer loops into one for better threading
|
||||
|
@ -425,7 +422,6 @@ void PPPMKokkos<DeviceType>::setup()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup4>(0,inum_fft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
compute_gf_ik();
|
||||
|
@ -753,7 +749,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||
if (eflag_atom) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self1>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
//for (i = nlocal; i < ntotal; i++) d_eatom[i] *= 0.5*qscale;
|
||||
}
|
||||
|
@ -761,7 +756,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||
if (vflag_atom) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self2>(0,ntotal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
}
|
||||
|
@ -1415,7 +1409,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik>(0,inum_fft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -1495,7 +1488,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik_triclinic()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik_triclinic>(nzlo_fft,nzhi_fft+1),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -1588,7 +1580,6 @@ void PPPMKokkos<DeviceType>::particle_map()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_particle_map>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_flag.template modify<DeviceType>();
|
||||
|
@ -1641,7 +1632,6 @@ void PPPMKokkos<DeviceType>::make_rho()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_zero>(0,inum_out),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// loop over my charges, add their contribution to nearby grid points
|
||||
|
@ -1654,7 +1644,6 @@ void PPPMKokkos<DeviceType>::make_rho()
|
|||
#ifdef KOKKOS_HAVE_CUDA
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_atomic>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
#else
|
||||
ix = nxhi_out-nxlo_out + 1;
|
||||
|
@ -1663,7 +1652,6 @@ void PPPMKokkos<DeviceType>::make_rho()
|
|||
copymode = 1;
|
||||
Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1);
|
||||
Kokkos::parallel_for(config,*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
#endif
|
||||
}
|
||||
|
@ -1794,7 +1782,6 @@ void PPPMKokkos<DeviceType>::brick2fft()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_brick2fft>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_density_fft.template modify<DeviceType>();
|
||||
|
@ -1842,7 +1829,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik1>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work1.template modify<DeviceType>();
|
||||
|
@ -1862,14 +1848,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
if (vflag_global) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
for (j = 0; j < 6; j++) virial[j] += ev.v[j];
|
||||
energy += ev.ecoul;
|
||||
} else {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
energy += ev.ecoul;
|
||||
}
|
||||
|
@ -1880,7 +1864,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik4>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// extra FFTs for per-atomKK energy/virial
|
||||
|
@ -1914,7 +1897,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik5>(0,inum_fft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -1926,7 +1908,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik6>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
|
||||
|
@ -1934,7 +1915,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik7>(0,inum_fft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -1946,14 +1926,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik8>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// z direction gradient
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik9>(0,inum_fft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -1965,7 +1943,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik10>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
}
|
||||
|
@ -2215,7 +2192,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
if (eflag_atom) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom1>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2227,7 +2203,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom2>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
}
|
||||
|
@ -2238,7 +2213,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom3>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2250,13 +2224,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom4>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom5>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2268,13 +2240,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom6>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom7>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2286,12 +2256,10 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom8>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom9>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2303,13 +2271,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom10>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom11>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2321,13 +2287,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom12>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom13>(0,nfft),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
k_work2.template modify<DeviceType>();
|
||||
|
@ -2339,7 +2303,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom14>(0,inum_inout),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
}
|
||||
|
@ -2545,7 +2508,6 @@ void PPPMKokkos<DeviceType>::fieldforce_ik()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_ik>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -2606,7 +2568,6 @@ void PPPMKokkos<DeviceType>::fieldforce_peratom()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_peratom>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -2682,12 +2643,10 @@ void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_
|
|||
if (flag == FORWARD_IK) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward1>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
} else if (flag == FORWARD_IK_PERATOM) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward2>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
}
|
||||
|
@ -2740,12 +2699,10 @@ void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FF
|
|||
if (flag == FORWARD_IK) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward1>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
} else if (flag == FORWARD_IK_PERATOM) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward2>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
}
|
||||
|
@ -2798,7 +2755,6 @@ void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_reverse>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -2829,7 +2785,6 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -2989,7 +2944,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
|
|||
double dipole = 0.0;
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr1>(0,nlocal),*this,dipole);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// sum local contributions to get global dipole moment
|
||||
|
@ -3003,7 +2957,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
|
|||
if (eflag_atom || fabs(qsum) > SMALL) {
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr2>(0,nlocal),*this,dipole_r2);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
// sum local contributions
|
||||
|
@ -3027,7 +2980,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
|
|||
efact = qscale * MY_2PI/volume;
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr3>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -3037,7 +2989,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr4>(0,nlocal),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
@ -3081,7 +3032,6 @@ int PPPMKokkos<DeviceType>::timing_1d(int n, double &time1d)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
MPI_Barrier(world);
|
||||
|
@ -3119,7 +3069,6 @@ int PPPMKokkos<DeviceType>::timing_3d(int n, double &time3d)
|
|||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
|
||||
DeviceType::fence();
|
||||
copymode = 0;
|
||||
|
||||
MPI_Barrier(world);
|
||||
|
|
Loading…
Reference in New Issue