Merge pull request #536 from akohlmey/fix-nvcc-openmp-conflicts

Implement workaround for NVCC incompatibilities with OpenMP directives
This commit is contained in:
sjplimp 2017-06-20 07:44:40 -06:00 committed by GitHub
commit 326a8a1289
3 changed files with 22 additions and 7 deletions

View File

@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj); qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
fqi += jq * fqij + fqji; fqi += jq * fqij + fqji;
#if defined(_OPENMP) #if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic #pragma omp atomic
#endif #endif
qf[j] += (iq * fqij + fqjj); qf[j] += (iq * fqij + fqjj);
@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj); qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
fqi += fqij; fqi += fqij;
#if defined(_OPENMP) #if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic #pragma omp atomic
#endif #endif
qf[j] += fqjj; qf[j] += fqjj;
} }
#if defined(_OPENMP) #if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp atomic #pragma omp atomic
#endif #endif
qf[i] += fqi; qf[i] += fqi;

View File

@ -69,8 +69,10 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
double total_Econ = 0; double total_Econ = 0;
int nthreads = control->nthreads; int nthreads = control->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ) #pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
{ #endif
{
int i, j, k, l, pi, pj, pk, pl, pij, plk; int i, j, k, l, pi, pj, pk, pl, pij, plk;
int type_i, type_j, type_k, type_l; int type_i, type_j, type_k, type_l;
int start_j, end_j; int start_j, end_j;
@ -125,7 +127,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
system->N, system->pair_ptr->eatom, system->N, system->pair_ptr->eatom,
system->pair_ptr->vatom, thr); system->pair_ptr->vatom, thr);
#if defined(_OPENMP)
#pragma omp for schedule(static) #pragma omp for schedule(static)
#endif
for (j = 0; j < system->N; ++j) { for (j = 0; j < system->N; ++j) {
start_j = Start_Index(j, bonds); start_j = Start_Index(j, bonds);
end_j = End_Index(j, bonds); end_j = End_Index(j, bonds);
@ -137,7 +141,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
} }
} }
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50) #pragma omp for schedule(dynamic,50)
#endif
for (j = 0; j < natoms; ++j) { for (j = 0; j < natoms; ++j) {
type_j = system->my_atoms[j].type; type_j = system->my_atoms[j].type;
Delta_j = workspace->Delta_boc[j]; Delta_j = workspace->Delta_boc[j];

View File

@ -124,8 +124,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
int nthreads = control->nthreads; int nthreads = control->nthreads;
int num_thb_intrs = 0; int num_thb_intrs = 0;
int TWICE = 2; int TWICE = 2;
#if defined(_OPENMP)
#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) #pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
#endif
{ {
int i, j, pi, k, pk, t; int i, j, pi, k, pk, t;
int type_i, type_j, type_k; int type_i, type_j, type_k;
@ -180,7 +181,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
const int per_thread = thb_intrs->num_intrs / nthreads; const int per_thread = thb_intrs->num_intrs / nthreads;
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50) #pragma omp for schedule(dynamic,50)
#endif
for (j = 0; j < system->N; ++j) { for (j = 0; j < system->N; ++j) {
type_j = system->my_atoms[j].type; type_j = system->my_atoms[j].type;
_my_offset[j] = 0; _my_offset[j] = 0;
@ -251,11 +254,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
} // for(j) } // for(j)
// Wait for all threads to finish counting angles // Wait for all threads to finish counting angles
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier #pragma omp barrier
#endif
// Master thread uses angle counts to compute offsets // Master thread uses angle counts to compute offsets
// This can be threaded // This can be threaded
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp master #pragma omp master
#endif
{ {
int current_count = 0; int current_count = 0;
int m = _my_offset[0]; int m = _my_offset[0];
@ -269,12 +275,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
} }
// All threads wait till master thread finished computing offsets // All threads wait till master thread finished computing offsets
#if defined(_OPENMP) && !defined(__NVCC__)
#pragma omp barrier #pragma omp barrier
#endif
// Original loop, but now using precomputed offsets // Original loop, but now using precomputed offsets
// Safe to use all threads available, regardless of threads tasked above // Safe to use all threads available, regardless of threads tasked above
// We also now skip over atoms that have no angles assigned // We also now skip over atoms that have no angles assigned
#if defined(_OPENMP)
#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided) #pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
#endif
for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N
type_j = system->my_atoms[j].type; type_j = system->my_atoms[j].type;
if(type_j < 0) continue; if(type_j < 0) continue;