Merge pull request #536 from akohlmey/fix-nvcc-openmp-conflicts

Implement workaround for NVCC incompatibilities with OpenMP directives
2017-06-20 07:44:40 -06:00 · 2017-06-20 07:44:40 -06:00 · 326a8a1289
parent b5300724bb 164cedf353
commit 326a8a1289
3 changed files with 22 additions and 7 deletions
--- a/src/USER-OMP/pair_comb_omp.cpp
+++ b/src/USER-OMP/pair_comb_omp.cpp
@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)

        qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
        fqi   += jq * fqij + fqji;
-#if defined(_OPENMP)
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp atomic
 #endif
        qf[j] += (iq * fqij + fqjj);
@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)

        qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
        fqi += fqij;
-#if defined(_OPENMP)
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp atomic
 #endif
        qf[j] += fqjj;
      }

-#if defined(_OPENMP)
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp atomic
 #endif
      qf[i] += fqi;
--- a/src/USER-OMP/reaxc_torsion_angles_omp.cpp
+++ b/src/USER-OMP/reaxc_torsion_angles_omp.cpp
@ -69,7 +69,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
  double total_Econ = 0;
  int  nthreads = control->nthreads;

+#if defined(_OPENMP)
 #pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
+#endif
  {
  int i, j, k, l, pi, pj, pk, pl, pij, plk;
  int type_i, type_j, type_k, type_l;
@ -125,7 +127,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
                                    system->N, system->pair_ptr->eatom,
                                    system->pair_ptr->vatom, thr);

+#if defined(_OPENMP)
 #pragma omp for schedule(static)
+#endif
  for (j = 0; j < system->N; ++j) {
    start_j = Start_Index(j, bonds);
    end_j = End_Index(j, bonds);
@ -137,7 +141,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
    }
  }

+#if defined(_OPENMP)
 #pragma omp for schedule(dynamic,50)
+#endif
  for (j = 0; j < natoms; ++j) {
    type_j = system->my_atoms[j].type;
    Delta_j = workspace->Delta_boc[j];
--- a/src/USER-OMP/reaxc_valence_angles_omp.cpp
+++ b/src/USER-OMP/reaxc_valence_angles_omp.cpp
@ -124,8 +124,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
  int  nthreads = control->nthreads;
  int  num_thb_intrs = 0;
  int  TWICE = 2;
-
+#if defined(_OPENMP)
 #pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
+#endif
  {
    int i, j, pi, k, pk, t;
    int type_i, type_j, type_k;
@ -180,7 +181,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,

    const int per_thread = thb_intrs->num_intrs / nthreads;

+#if defined(_OPENMP)
 #pragma omp for schedule(dynamic,50)
+#endif
    for (j = 0; j < system->N; ++j) {
      type_j = system->my_atoms[j].type;
      _my_offset[j] = 0;
@ -251,11 +254,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
    } // for(j)

    // Wait for all threads to finish counting angles
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp barrier
-
+#endif
    // Master thread uses angle counts to compute offsets
    // This can be threaded
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp master
+#endif
    {
      int current_count = 0;
      int m = _my_offset[0];
@ -269,12 +275,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
    }

    // All threads wait till master thread finished computing offsets
+#if defined(_OPENMP) && !defined(__NVCC__)
 #pragma omp barrier
-
+#endif
    // Original loop, but now using precomputed offsets
    // Safe to use all threads available, regardless of threads tasked above
    // We also now skip over atoms that have no angles assigned
+#if defined(_OPENMP)
 #pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
+#endif
    for (j = 0; j < system->N; ++j) {         // Ray: the first one with system->N
      type_j = system->my_atoms[j].type;
      if(type_j < 0) continue;