forked from lijiext/lammps
Merge pull request #536 from akohlmey/fix-nvcc-openmp-conflicts
Implement workaround for NVCC incompatibilities with OpenMP directives
This commit is contained in:
commit
326a8a1289
|
@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
|
||||||
|
|
||||||
qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj);
|
qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj);
|
||||||
fqi += jq * fqij + fqji;
|
fqi += jq * fqij + fqji;
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
#endif
|
#endif
|
||||||
qf[j] += (iq * fqij + fqjj);
|
qf[j] += (iq * fqij + fqjj);
|
||||||
|
@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
|
||||||
|
|
||||||
qfo_short(¶ms[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
|
qfo_short(¶ms[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
|
||||||
fqi += fqij;
|
fqi += fqij;
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
#endif
|
#endif
|
||||||
qf[j] += fqjj;
|
qf[j] += fqjj;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
#endif
|
#endif
|
||||||
qf[i] += fqi;
|
qf[i] += fqi;
|
||||||
|
|
|
@ -69,8 +69,10 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
|
||||||
double total_Econ = 0;
|
double total_Econ = 0;
|
||||||
int nthreads = control->nthreads;
|
int nthreads = control->nthreads;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
|
#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ)
|
||||||
{
|
#endif
|
||||||
|
{
|
||||||
int i, j, k, l, pi, pj, pk, pl, pij, plk;
|
int i, j, k, l, pi, pj, pk, pl, pij, plk;
|
||||||
int type_i, type_j, type_k, type_l;
|
int type_i, type_j, type_k, type_l;
|
||||||
int start_j, end_j;
|
int start_j, end_j;
|
||||||
|
@ -125,7 +127,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
|
||||||
system->N, system->pair_ptr->eatom,
|
system->N, system->pair_ptr->eatom,
|
||||||
system->pair_ptr->vatom, thr);
|
system->pair_ptr->vatom, thr);
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
|
#endif
|
||||||
for (j = 0; j < system->N; ++j) {
|
for (j = 0; j < system->N; ++j) {
|
||||||
start_j = Start_Index(j, bonds);
|
start_j = Start_Index(j, bonds);
|
||||||
end_j = End_Index(j, bonds);
|
end_j = End_Index(j, bonds);
|
||||||
|
@ -137,7 +141,9 @@ void Torsion_AnglesOMP( reax_system *system, control_params *control,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp for schedule(dynamic,50)
|
#pragma omp for schedule(dynamic,50)
|
||||||
|
#endif
|
||||||
for (j = 0; j < natoms; ++j) {
|
for (j = 0; j < natoms; ++j) {
|
||||||
type_j = system->my_atoms[j].type;
|
type_j = system->my_atoms[j].type;
|
||||||
Delta_j = workspace->Delta_boc[j];
|
Delta_j = workspace->Delta_boc[j];
|
||||||
|
|
|
@ -124,8 +124,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
|
||||||
int nthreads = control->nthreads;
|
int nthreads = control->nthreads;
|
||||||
int num_thb_intrs = 0;
|
int num_thb_intrs = 0;
|
||||||
int TWICE = 2;
|
int TWICE = 2;
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
|
#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int i, j, pi, k, pk, t;
|
int i, j, pi, k, pk, t;
|
||||||
int type_i, type_j, type_k;
|
int type_i, type_j, type_k;
|
||||||
|
@ -180,7 +181,9 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
|
||||||
|
|
||||||
const int per_thread = thb_intrs->num_intrs / nthreads;
|
const int per_thread = thb_intrs->num_intrs / nthreads;
|
||||||
|
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp for schedule(dynamic,50)
|
#pragma omp for schedule(dynamic,50)
|
||||||
|
#endif
|
||||||
for (j = 0; j < system->N; ++j) {
|
for (j = 0; j < system->N; ++j) {
|
||||||
type_j = system->my_atoms[j].type;
|
type_j = system->my_atoms[j].type;
|
||||||
_my_offset[j] = 0;
|
_my_offset[j] = 0;
|
||||||
|
@ -251,11 +254,14 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
|
||||||
} // for(j)
|
} // for(j)
|
||||||
|
|
||||||
// Wait for all threads to finish counting angles
|
// Wait for all threads to finish counting angles
|
||||||
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp barrier
|
#pragma omp barrier
|
||||||
|
#endif
|
||||||
// Master thread uses angle counts to compute offsets
|
// Master thread uses angle counts to compute offsets
|
||||||
// This can be threaded
|
// This can be threaded
|
||||||
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp master
|
#pragma omp master
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int current_count = 0;
|
int current_count = 0;
|
||||||
int m = _my_offset[0];
|
int m = _my_offset[0];
|
||||||
|
@ -269,12 +275,15 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
|
||||||
}
|
}
|
||||||
|
|
||||||
// All threads wait till master thread finished computing offsets
|
// All threads wait till master thread finished computing offsets
|
||||||
|
#if defined(_OPENMP) && !defined(__NVCC__)
|
||||||
#pragma omp barrier
|
#pragma omp barrier
|
||||||
|
#endif
|
||||||
// Original loop, but now using precomputed offsets
|
// Original loop, but now using precomputed offsets
|
||||||
// Safe to use all threads available, regardless of threads tasked above
|
// Safe to use all threads available, regardless of threads tasked above
|
||||||
// We also now skip over atoms that have no angles assigned
|
// We also now skip over atoms that have no angles assigned
|
||||||
|
#if defined(_OPENMP)
|
||||||
#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
|
#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided)
|
||||||
|
#endif
|
||||||
for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N
|
for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N
|
||||||
type_j = system->my_atoms[j].type;
|
type_j = system->my_atoms[j].type;
|
||||||
if(type_j < 0) continue;
|
if(type_j < 0) continue;
|
||||||
|
|
Loading…
Reference in New Issue