Merge pull request #536 from akohlmey/fix-nvcc-openmp-conflicts

Implement workaround for NVCC incompatibilities with OpenMP directives
This commit is contained in:
sjplimp 2017-06-20 07:44:40 -06:00 committed by GitHub
commit 326a8a1289
3 changed files with 22 additions and 7 deletions

View File

@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
fqi += jq * fqij + fqji;
#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
qf[j] += (iq * fqij + fqjj);
@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
fqi += fqij;
#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
qf[j] += fqjj;
}
#if defined(_OPENMP)
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic
#endif
qf[i] += fqi;

View File

@ -69,7 +69,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
double total_Econ = 0;
int nthreads = control->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
#endif
{
int i, j, k, l, pi, pj, pk, pl, pij, plk;
int type_i, type_j, type_k, type_l;
@ -125,7 +127,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
system->N, system->pair_ptr->eatom,
system->pair_ptr->vatom, thr);
#if defined(_OPENMP)
#pragma omp for schedule(static)
#endif
for (j = 0; j < system->N; ++j) {
start_j = Start_Index(j, bonds);
end_j = End_Index(j, bonds);
@ -137,7 +141,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
}
}
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)
#endif
for (j = 0; j < natoms; ++j) {
type_j = system->my_atoms[j].type;
Delta_j = workspace->Delta_boc[j];

View File

@ -124,8 +124,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
int nthreads = control->nthreads;
int num_thb_intrs = 0;
int TWICE = 2;
#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
#endif
{
int i, j, pi, k, pk, t;
int type_i, type_j, type_k;
@ -180,7 +181,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
const int per_thread = thb_intrs->num_intrs / nthreads;
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)
#endif
for (j = 0; j < system->N; ++j) {
type_j = system->my_atoms[j].type;
_my_offset[j] = 0;
@ -251,11 +254,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
} // for(j)
// Wait for all threads to finish counting angles
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier
#endif
// Master thread uses angle counts to compute offsets
// This can be threaded
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp master
#endif
{
int current_count = 0;
int m = _my_offset[0];
@ -269,12 +275,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
}
// All threads wait till master thread finished computing offsets
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier
#endif
// Original loop, but now using precomputed offsets
// Safe to use all threads available, regardless of threads tasked above
// We also now skip over atoms that have no angles assigned
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
#endif
for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N
type_j = system->my_atoms[j].type;
if(type_j < 0) continue;