Add Kokkos thread fences to pack/unpack routines

This commit is contained in:
Stan Moore 2017-12-15 13:01:38 -07:00
parent d029cb9002
commit 68cf6941e1
2 changed files with 21 additions and 3 deletions

View File

@ -200,6 +200,7 @@ void CommKokkos::forward_comm_device(int dummy)
} }
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) { if (n) {
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(), MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
n,MPI_DOUBLE,sendproc[iswap],0,world); n,MPI_DOUBLE,sendproc[iswap],0,world);
@ -229,11 +230,13 @@ void CommKokkos::forward_comm_device(int dummy)
recvproc[iswap],0,world,&request); recvproc[iswap],0,world,&request);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]); k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) if (n)
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n, MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
MPI_DOUBLE,sendproc[iswap],0,world); MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} }
} else { } else {
@ -395,6 +398,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
// exchange with another proc // exchange with another proc
// if self, set recv buffer to send buffer // if self, set recv buffer to send buffer
@ -411,6 +415,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
// unpack buffer // unpack buffer
pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair); pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair);
DeviceType::fence();
} }
} }
@ -610,6 +615,7 @@ void CommKokkos::exchange_device()
k_exchange_sendlist,k_exchange_copylist, k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>:: ExecutionSpaceFromDevice<DeviceType>::
space,dim,lo,hi); space,dim,lo,hi);
DeviceType::fence();
} else { } else {
while (i < nlocal) { while (i < nlocal) {
@ -634,6 +640,7 @@ void CommKokkos::exchange_device()
atom->nlocal=avec-> atom->nlocal=avec->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space); ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
} }
} else { } else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
@ -666,6 +673,7 @@ void CommKokkos::exchange_device()
atom->nlocal = avec-> atom->nlocal = avec->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space); ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
} }
} }
@ -925,11 +933,14 @@ void CommKokkos::borders_device() {
"implemented with Kokkos"); "implemented with Kokkos");
n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send, n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
pbc_flag[iswap],pbc[iswap]); pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
} }
else else {
n = avec-> n = avec->
pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap, pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
pbc_flag[iswap],pbc[iswap],exec_space); pbc_flag[iswap],pbc[iswap],exec_space);
DeviceType::fence();
}
// swap atoms with other proc // swap atoms with other proc
// no MPI calls except SendRecv if nsend/nrecv = 0 // no MPI calls except SendRecv if nsend/nrecv = 0
@ -960,12 +971,15 @@ void CommKokkos::borders_device() {
avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
} }
else else
if (sendproc[iswap] != me) if (sendproc[iswap] != me) {
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
k_buf_recv,exec_space); k_buf_recv,exec_space);
else DeviceType::fence();
} else {
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
k_buf_send,exec_space); k_buf_send,exec_space);
DeviceType::fence();
}
// set all pointers & counters // set all pointers & counters

View File

@ -523,6 +523,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
else else
kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m); kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m);
DeviceType::fence();
if (swap[m].sendproc != me) { if (swap[m].sendproc != me) {
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR, MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR,
@ -533,6 +534,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
} }
kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
DeviceType::fence();
} }
} }
@ -554,6 +556,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
else else
kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m); kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m);
DeviceType::fence();
if (swap[m].recvproc != me) { if (swap[m].recvproc != me) {
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR, MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR,
@ -564,6 +567,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
} }
kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
DeviceType::fence();
} }
} }