Removing unnecessary fences

This commit is contained in:
Stan Moore 2017-06-05 13:54:13 -06:00
parent 286d4f2743
commit ed50bd2254
21 changed files with 0 additions and 156 deletions

View File

@ -111,7 +111,6 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleCharmmCompute<0,0> >(0,nanglelist),*this);
}
}
DeviceType::fence();
if (eflag_global) energy += ev.evdwl;
if (vflag_global) {

View File

@ -110,7 +110,6 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondClass2Compute<0,0> >(0,nbondlist),*this);
}
}
//DeviceType::fence();
if (eflag_global) energy += ev.evdwl;
if (vflag_global) {

View File

@ -125,7 +125,6 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondFENECompute<0,0> >(0,nbondlist),*this);
}
}
DeviceType::fence();
k_warning_flag.template modify<DeviceType>();
k_warning_flag.template sync<LMPHostType>();

View File

@ -111,7 +111,6 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this);
}
}
//DeviceType::fence();
if (eflag_global) energy += ev.evdwl;
if (vflag_global) {

View File

@ -63,7 +63,6 @@ double ComputeTempKokkos<DeviceType>::compute_scalar()
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk);
else
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk);
DeviceType::fence();
copymode = 0;
t = t_kk.t0; // could make this more efficient
@ -118,7 +117,6 @@ void ComputeTempKokkos<DeviceType>::compute_vector()
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk);
else
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk);
DeviceType::fence();
copymode = 0;
t[0] = t_kk.t0;

View File

@ -132,7 +132,6 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmCompute<0,0> >(0,ndihedrallist),*this);
}
}
DeviceType::fence();
// error check

View File

@ -159,7 +159,6 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralClass2Compute<0,0> >(0,ndihedrallist),*this);
}
}
DeviceType::fence();
// error check

View File

@ -121,7 +121,6 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralOPLSCompute<0,0> >(0,ndihedrallist),*this);
}
}
DeviceType::fence();
// error check

View File

@ -506,7 +506,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
Kokkos::parallel_for(nlocal,post_functor);
}
DeviceType::fence();
if(tbiasflag == BIAS){
atomKK->sync(temperature->execution_space,temperature->datamask_read);
@ -531,7 +530,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
// set total force zero in parallel on the device
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
Kokkos::parallel_for(nlocal,zero_functor);
DeviceType::fence();
}
// f is modified by both post_force and zero_force functors
atomKK->modified(execution_space,datamask_modify);
@ -726,7 +724,6 @@ double FixLangevinKokkos<DeviceType>::compute_scalar()
k_flangevin.template sync<DeviceType>();
FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
DeviceType::fence();
energy = 0.5*energy_onestep*update->dt;
}
@ -770,7 +767,6 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
k_flangevin.template sync<DeviceType>();
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
DeviceType::fence();
energy += energy_onestep*update->dt;
}

View File

@ -495,7 +495,6 @@ void FixNHKokkos<DeviceType>::nh_v_press()
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<1> >(0,nlocal),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<0> >(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
atomKK->modified(execution_space,V_MASK);
@ -550,7 +549,6 @@ void FixNHKokkos<DeviceType>::nve_v()
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -595,7 +593,6 @@ void FixNHKokkos<DeviceType>::nve_x()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_x>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -631,7 +628,6 @@ void FixNHKokkos<DeviceType>::nh_v_temp()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_temp>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
atomKK->modified(execution_space,V_MASK);

View File

@ -76,7 +76,6 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
}
DeviceType::fence();
}
template<class DeviceType>
@ -133,7 +132,6 @@ void FixNVEKokkos<DeviceType>::final_integrate()
FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
}
DeviceType::fence();
// debug
//atomKK->sync(Host,datamask_read);

View File

@ -234,12 +234,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
// compute_H
FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this);
Kokkos::parallel_scan(inum,computeH_functor);
DeviceType::fence();
// init_matvec
FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this);
Kokkos::parallel_for(inum,matvec_functor);
DeviceType::fence();
// comm->forward_comm_fix(this); //Dist_vector( s );
pack_flag = 2;
@ -259,15 +257,12 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
// 1st cg solve over b_s, s
cg_solve1();
DeviceType::fence();
// 2nd cg solve over b_t, t
cg_solve2();
DeviceType::fence();
// calculate_Q();
calculate_q();
DeviceType::fence();
copymode = 0;
@ -354,7 +349,6 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array()
const int ignum = atom->nlocal + atom->nghost;
FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this);
Kokkos::parallel_for(ignum,zero_functor);
DeviceType::fence();
}
/* ---------------------------------------------------------------------- */
@ -499,10 +493,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
// sparse_matvec( &H, x, q );
FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this);
Kokkos::parallel_for(inum,sparse12_functor);
DeviceType::fence();
if (neighflag != FULL) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
DeviceType::fence();
if (neighflag == HALF) {
FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this);
Kokkos::parallel_for(inum,sparse13_functor);
@ -513,7 +505,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
} else {
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this);
}
DeviceType::fence();
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
@ -529,21 +520,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
F_FLOAT my_norm = 0.0;
FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this);
Kokkos::parallel_reduce(inum,norm1_functor,my_norm);
DeviceType::fence();
F_FLOAT norm_sqr = 0.0;
MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
b_norm = sqrt(norm_sqr);
DeviceType::fence();
// sig_new = parallel_dot( r, d, nn);
F_FLOAT my_dot = 0.0;
FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
DeviceType::fence();
F_FLOAT dot_sqr = 0.0;
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
F_FLOAT sig_new = dot_sqr;
DeviceType::fence();
int loop;
const int loopmax = 200;
@ -560,10 +547,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
// sparse_matvec( &H, d, q );
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
Kokkos::parallel_for(inum,sparse22_functor);
DeviceType::fence();
if (neighflag != FULL) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
DeviceType::fence();
if (neighflag == HALF) {
FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
Kokkos::parallel_for(inum,sparse23_functor);
@ -574,7 +559,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
} else {
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
}
DeviceType::fence();
if (neighflag != FULL) {
@ -589,7 +573,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
my_dot = dot_sqr = 0.0;
FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
DeviceType::fence();
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
tmp = dot_sqr;
@ -602,12 +585,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
my_dot = dot_sqr = 0.0;
FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this);
Kokkos::parallel_for(inum,precon1_functor);
DeviceType::fence();
// preconditioning: p[j] = r[j] * Hdia_inv[j];
// sig_new = parallel_dot( r, p, nn);
FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
Kokkos::parallel_reduce(inum,precon_functor,my_dot);
DeviceType::fence();
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
sig_new = dot_sqr;
@ -616,7 +597,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
// vector_sum( d, 1., p, beta, d, nn );
FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
Kokkos::parallel_for(inum,vecsum2_functor);
DeviceType::fence();
}
if (loop >= loopmax && comm->me == 0) {
@ -644,10 +624,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
// sparse_matvec( &H, x, q );
FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this);
Kokkos::parallel_for(inum,sparse32_functor);
DeviceType::fence();
if (neighflag != FULL) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
DeviceType::fence();
if (neighflag == HALF) {
FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this);
Kokkos::parallel_for(inum,sparse33_functor);
@ -658,7 +636,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
} else {
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this);
}
DeviceType::fence();
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
@ -674,21 +651,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
F_FLOAT my_norm = 0.0;
FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this);
Kokkos::parallel_reduce(inum,norm2_functor,my_norm);
DeviceType::fence();
F_FLOAT norm_sqr = 0.0;
MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
b_norm = sqrt(norm_sqr);
DeviceType::fence();
// sig_new = parallel_dot( r, d, nn);
F_FLOAT my_dot = 0.0;
FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
DeviceType::fence();
F_FLOAT dot_sqr = 0.0;
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
F_FLOAT sig_new = dot_sqr;
DeviceType::fence();
int loop;
const int loopmax = 200;
@ -705,10 +678,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
// sparse_matvec( &H, d, q );
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
Kokkos::parallel_for(inum,sparse22_functor);
DeviceType::fence();
if (neighflag != FULL) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
DeviceType::fence();
if (neighflag == HALF) {
FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
Kokkos::parallel_for(inum,sparse23_functor);
@ -719,7 +690,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
} else {
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
}
DeviceType::fence();
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
@ -733,10 +703,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
my_dot = dot_sqr = 0.0;
FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
DeviceType::fence();
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
tmp = dot_sqr;
DeviceType::fence();
alpha = sig_new / tmp;
@ -747,12 +715,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
my_dot = dot_sqr = 0.0;
FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this);
Kokkos::parallel_for(inum,precon2_functor);
DeviceType::fence();
// preconditioning: p[j] = r[j] * Hdia_inv[j];
// sig_new = parallel_dot( r, p, nn);
FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
Kokkos::parallel_reduce(inum,precon_functor,my_dot);
DeviceType::fence();
MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
sig_new = dot_sqr;
@ -761,7 +727,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
// vector_sum( d, 1., p, beta, d, nn );
FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
Kokkos::parallel_for(inum,vecsum2_functor);
DeviceType::fence();
}
if (loop >= loopmax && comm->me == 0) {
@ -786,7 +751,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
sum = sum_all = 0.0;
FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this);
Kokkos::parallel_reduce(inum,vecacc1_functor,sum);
DeviceType::fence();
MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
const F_FLOAT s_sum = sum_all;
@ -794,7 +758,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
sum = sum_all = 0.0;
FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this);
Kokkos::parallel_reduce(inum,vecacc2_functor,sum);
DeviceType::fence();
MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
const F_FLOAT t_sum = sum_all;
@ -804,7 +767,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
// q[i] = s[i] - u * t[i];
FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
Kokkos::parallel_for(inum,calculateQ_functor);
DeviceType::fence();
pack_flag = 4;
//comm->forward_comm_fix( this ); //Dist_vector( atom->q );

View File

@ -108,7 +108,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
if (varflag == CONSTANT) {
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceConstant>(0,nlocal),*this,foriginal_kk);
DeviceType::fence();
copymode = 0;
// variable force, wrap with clear/add
@ -138,7 +137,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceNonConstant>(0,nlocal),*this,foriginal_kk);
DeviceType::fence();
copymode = 0;
}

View File

@ -79,7 +79,6 @@ void FixWallReflectKokkos<DeviceType>::post_integrate()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallReflectPostIntegrate>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}

View File

@ -140,7 +140,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2Compute<0,0> >(0,nimproperlist),*this);
}
}
DeviceType::fence();
if (eflag_global) energy += ev.evdwl;
// error check
@ -165,7 +164,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2AngleAngle<0,0> >(0,nimproperlist),*this);
}
}
DeviceType::fence();
if (eflag_global) energy += ev.evdwl;
if (vflag_global) {

View File

@ -128,7 +128,6 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this);
}
}
//DeviceType::fence();
// error check

View File

@ -409,7 +409,6 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this);
DeviceType::fence();
return n;
}
@ -428,7 +427,6 @@ void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this);
DeviceType::fence();
}
template<class DeviceType>

View File

@ -133,7 +133,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this);
DeviceType::fence();
// loop over neighbors of my atoms
@ -156,7 +155,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this);
}
}
DeviceType::fence();
// communicate and sum densities (on the host)
@ -174,7 +172,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this);
DeviceType::fence();
} else if (neighflag == FULL) {
@ -184,7 +181,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this);
DeviceType::fence();
}
if (eflag) {
@ -239,7 +235,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
}
}
}
DeviceType::fence();
if (eflag_global) eng_vdwl += ev.evdwl;
if (vflag_global) {
@ -414,7 +409,6 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this);
DeviceType::fence();
return n;
}
@ -433,7 +427,6 @@ void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this);
DeviceType::fence();
}
template<class DeviceType>

View File

@ -409,7 +409,6 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMPackForwardComm>(0,n),*this);
DeviceType::fence();
return n;
}
@ -428,7 +427,6 @@ void PairEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in,
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMUnpackForwardComm>(0,n),*this);
DeviceType::fence();
}
template<class DeviceType>

View File

@ -731,7 +731,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this);
}
DeviceType::fence();
ev_all += ev;
pvector[13] = ev.ecoul;
@ -771,7 +770,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this);
}
}
DeviceType::fence();
ev_all += ev;
pvector[10] = ev.evdwl;
pvector[11] = ev.ecoul;
@ -800,7 +798,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// zero
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this);
DeviceType::fence();
if (neighflag == HALF)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this);
@ -808,7 +805,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this);
else //(neighflag == FULL)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this);
DeviceType::fence();
k_resize_bo.modify<DeviceType>();
k_resize_bo.sync<LMPHostType>();
@ -827,15 +823,11 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// Bond order
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
DeviceType::fence();
} else if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this);
DeviceType::fence();
}
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this);
DeviceType::fence();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this);
DeviceType::fence();
// Bond energy
if (neighflag == HALF) {
@ -843,7 +835,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] = ev.evdwl;
} else { //if (neighflag == HALFTHREAD) {
@ -851,7 +842,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] = ev.evdwl;
}
@ -859,21 +849,17 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// Multi-body corrections
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[2] = ev.ereax[0];
@ -887,14 +873,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[4] = ev.ereax[3];
@ -908,14 +892,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[8] = ev.ereax[6];
@ -929,14 +911,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
}
@ -946,22 +926,18 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// Bond force
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] += ev.evdwl;
} else { //if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] += ev.evdwl;
}
@ -3945,11 +3921,9 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag)
if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
if (eflag_atom) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this);
DeviceType::fence();
}
if (vflag_atom) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this);
DeviceType::fence();
}
// if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
@ -4002,7 +3976,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
{
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondZero>(0,nmax),*this);
DeviceType::fence();
bo_cut_bond = control->bg_cut;
@ -4017,7 +3990,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
numbonds = 0;
PairReaxCKokkosFindBondFunctor<DeviceType> find_bond_functor(this);
Kokkos::parallel_reduce(inum,find_bond_functor,numbonds);
DeviceType::fence();
copymode = 0;
}
@ -4076,7 +4048,6 @@ void PairReaxCKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, int
nlocal = atomKK->nlocal;
PairReaxCKokkosPackBondBufferFunctor<DeviceType> pack_bond_buffer_functor(this);
Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor);
DeviceType::fence();
copymode = 0;
k_buf.modify<DeviceType>();
@ -4135,11 +4106,9 @@ void PairReaxCKokkos<DeviceType>::FindBondSpecies()
{
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpeciesZero>(0,nmax),*this);
DeviceType::fence();
nlocal = atomKK->nlocal;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpecies>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
// NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added

View File

@ -403,17 +403,14 @@ void PPPMKokkos<DeviceType>::setup()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup1>(nxlo_fft,nxhi_fft+1),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup2>(nylo_fft,nyhi_fft+1),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup3>(nzlo_fft,nzhi_fft+1),*this);
DeviceType::fence();
copymode = 0;
// merge three outer loops into one for better threading
@ -425,7 +422,6 @@ void PPPMKokkos<DeviceType>::setup()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup4>(0,inum_fft),*this);
DeviceType::fence();
copymode = 0;
compute_gf_ik();
@ -753,7 +749,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
if (eflag_atom) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self1>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
//for (i = nlocal; i < ntotal; i++) d_eatom[i] *= 0.5*qscale;
}
@ -761,7 +756,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
if (vflag_atom) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self2>(0,ntotal),*this);
DeviceType::fence();
copymode = 0;
}
}
@ -1415,7 +1409,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik>(0,inum_fft),*this);
DeviceType::fence();
copymode = 0;
}
@ -1495,7 +1488,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik_triclinic()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik_triclinic>(nzlo_fft,nzhi_fft+1),*this);
DeviceType::fence();
copymode = 0;
}
@ -1588,7 +1580,6 @@ void PPPMKokkos<DeviceType>::particle_map()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_particle_map>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
k_flag.template modify<DeviceType>();
@ -1641,7 +1632,6 @@ void PPPMKokkos<DeviceType>::make_rho()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_zero>(0,inum_out),*this);
DeviceType::fence();
copymode = 0;
// loop over my charges, add their contribution to nearby grid points
@ -1654,7 +1644,6 @@ void PPPMKokkos<DeviceType>::make_rho()
#ifdef KOKKOS_HAVE_CUDA
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_atomic>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
#else
ix = nxhi_out-nxlo_out + 1;
@ -1663,7 +1652,6 @@ void PPPMKokkos<DeviceType>::make_rho()
copymode = 1;
Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1);
Kokkos::parallel_for(config,*this);
DeviceType::fence();
copymode = 0;
#endif
}
@ -1794,7 +1782,6 @@ void PPPMKokkos<DeviceType>::brick2fft()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_brick2fft>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
k_density_fft.template modify<DeviceType>();
@ -1842,7 +1829,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik1>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work1.template modify<DeviceType>();
@ -1862,14 +1848,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
if (vflag_global) {
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev);
DeviceType::fence();
copymode = 0;
for (j = 0; j < 6; j++) virial[j] += ev.v[j];
energy += ev.ecoul;
} else {
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev);
DeviceType::fence();
copymode = 0;
energy += ev.ecoul;
}
@ -1880,7 +1864,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik4>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
// extra FFTs for per-atomKK energy/virial
@ -1914,7 +1897,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik5>(0,inum_fft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -1926,7 +1908,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik6>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
@ -1934,7 +1915,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik7>(0,inum_fft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -1946,14 +1926,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik8>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
// z direction gradient
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik9>(0,inum_fft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -1965,7 +1943,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik10>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
}
@ -2215,7 +2192,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
if (eflag_atom) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom1>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2227,7 +2203,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom2>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
}
@ -2238,7 +2213,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom3>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2250,13 +2224,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom4>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom5>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2268,13 +2240,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom6>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom7>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2286,12 +2256,10 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom8>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom9>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2303,13 +2271,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom10>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom11>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2321,13 +2287,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom12>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom13>(0,nfft),*this);
DeviceType::fence();
copymode = 0;
k_work2.template modify<DeviceType>();
@ -2339,7 +2303,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom14>(0,inum_inout),*this);
DeviceType::fence();
copymode = 0;
}
@ -2545,7 +2508,6 @@ void PPPMKokkos<DeviceType>::fieldforce_ik()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_ik>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -2606,7 +2568,6 @@ void PPPMKokkos<DeviceType>::fieldforce_peratom()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_peratom>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -2682,12 +2643,10 @@ void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_
if (flag == FORWARD_IK) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward1>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
} else if (flag == FORWARD_IK_PERATOM) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward2>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
}
}
@ -2740,12 +2699,10 @@ void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FF
if (flag == FORWARD_IK) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward1>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
} else if (flag == FORWARD_IK_PERATOM) {
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward2>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
}
}
@ -2798,7 +2755,6 @@ void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_reverse>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
}
@ -2829,7 +2785,6 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this);
DeviceType::fence();
copymode = 0;
}
@ -2989,7 +2944,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
double dipole = 0.0;
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr1>(0,nlocal),*this,dipole);
DeviceType::fence();
copymode = 0;
// sum local contributions to get global dipole moment
@ -3003,7 +2957,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
if (eflag_atom || fabs(qsum) > SMALL) {
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr2>(0,nlocal),*this,dipole_r2);
DeviceType::fence();
copymode = 0;
// sum local contributions
@ -3027,7 +2980,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
efact = qscale * MY_2PI/volume;
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr3>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -3037,7 +2989,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr4>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
}
@ -3081,7 +3032,6 @@ int PPPMKokkos<DeviceType>::timing_1d(int n, double &time1d)
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
DeviceType::fence();
copymode = 0;
MPI_Barrier(world);
@ -3119,7 +3069,6 @@ int PPPMKokkos<DeviceType>::timing_3d(int n, double &time3d)
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
DeviceType::fence();
copymode = 0;
MPI_Barrier(world);