forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12406 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
f6c0d4fec3
commit
777e82995d
|
@ -81,7 +81,7 @@ void PairEAMOpt::eval()
|
|||
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double evdwl = 0.0;
|
||||
double* __restrict__ coeff;
|
||||
double* _noalias coeff;
|
||||
|
||||
// grow energy array if necessary
|
||||
|
||||
|
@ -93,13 +93,13 @@ void PairEAMOpt::eval()
|
|||
fp = (double *) memory->smalloc(nmax*sizeof(double),"pair:fp");
|
||||
}
|
||||
|
||||
double** __restrict__ x = atom->x;
|
||||
double** __restrict__ f = atom->f;
|
||||
int* __restrict__ type = atom->type;
|
||||
double** _noalias x = atom->x;
|
||||
double** _noalias f = atom->f;
|
||||
int* _noalias type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
vec3_t* __restrict__ xx = (vec3_t*)x[0];
|
||||
vec3_t* __restrict__ ff = (vec3_t*)f[0];
|
||||
vec3_t* _noalias xx = (vec3_t*)x[0];
|
||||
vec3_t* _noalias ff = (vec3_t*)f[0];
|
||||
|
||||
double tmp_cutforcesq = cutforcesq;
|
||||
double tmp_rdr = rdr;
|
||||
|
@ -107,17 +107,17 @@ void PairEAMOpt::eval()
|
|||
int nr1 = nr-1;
|
||||
|
||||
inum = list->inum;
|
||||
int* __restrict__ ilist = list->ilist;
|
||||
int** __restrict__ firstneigh = list->firstneigh;
|
||||
int* __restrict__ numneigh = list->numneigh;
|
||||
int* _noalias ilist = list->ilist;
|
||||
int** _noalias firstneigh = list->firstneigh;
|
||||
int* _noalias numneigh = list->numneigh;
|
||||
|
||||
int ntypes = atom->ntypes;
|
||||
int ntypes2 = ntypes*ntypes;
|
||||
|
||||
fast_alpha_t* __restrict__ fast_alpha =
|
||||
fast_alpha_t* _noalias fast_alpha =
|
||||
(fast_alpha_t*) malloc(ntypes2*(nr+1)*sizeof(fast_alpha_t));
|
||||
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
|
||||
fast_alpha_t* __restrict__ tab = &fast_alpha[i*ntypes*nr+j*nr];
|
||||
fast_alpha_t* _noalias tab = &fast_alpha[i*ntypes*nr+j*nr];
|
||||
if (type2rhor[i+1][j+1] >= 0) {
|
||||
for(int m = 1; m <= nr; m++) {
|
||||
tab[m].rhor0i = rhor_spline[type2rhor[i+1][j+1]][m][6];
|
||||
|
@ -135,12 +135,12 @@ void PairEAMOpt::eval()
|
|||
}
|
||||
}
|
||||
}
|
||||
fast_alpha_t* __restrict__ tabeight = fast_alpha;
|
||||
fast_alpha_t* _noalias tabeight = fast_alpha;
|
||||
|
||||
fast_gamma_t* __restrict__ fast_gamma =
|
||||
fast_gamma_t* _noalias fast_gamma =
|
||||
(fast_gamma_t*) malloc(ntypes2*(nr+1)*sizeof(fast_gamma_t));
|
||||
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
|
||||
fast_gamma_t* __restrict__ tab = &fast_gamma[i*ntypes*nr+j*nr];
|
||||
fast_gamma_t* _noalias tab = &fast_gamma[i*ntypes*nr+j*nr];
|
||||
if (type2rhor[i+1][j+1] >= 0) {
|
||||
for(int m = 1; m <= nr; m++) {
|
||||
tab[m].rhor4i = rhor_spline[type2rhor[i+1][j+1]][m][2];
|
||||
|
@ -168,7 +168,7 @@ void PairEAMOpt::eval()
|
|||
}
|
||||
}
|
||||
}
|
||||
fast_gamma_t* __restrict__ tabss = fast_gamma;
|
||||
fast_gamma_t* _noalias tabss = fast_gamma;
|
||||
|
||||
// zero out density
|
||||
|
||||
|
@ -188,11 +188,11 @@ void PairEAMOpt::eval()
|
|||
double ytmp = xx[i].y;
|
||||
double ztmp = xx[i].z;
|
||||
itype = type[i] - 1;
|
||||
int* __restrict__ jlist = firstneigh[i];
|
||||
int* _noalias jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
double tmprho = rho[i];
|
||||
fast_alpha_t* __restrict__ tabeighti = &tabeight[itype*ntypes*nr];
|
||||
fast_alpha_t* _noalias tabeighti = &tabeight[itype*ntypes*nr];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
@ -265,14 +265,14 @@ void PairEAMOpt::eval()
|
|||
double ytmp = xx[i].y;
|
||||
double ztmp = xx[i].z;
|
||||
int itype1 = type[i] - 1;
|
||||
int* __restrict__ jlist = firstneigh[i];
|
||||
int* _noalias jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
double tmpfx = 0.0;
|
||||
double tmpfy = 0.0;
|
||||
double tmpfz = 0.0;
|
||||
|
||||
fast_gamma_t* __restrict__ tabssi = &tabss[itype1*ntypes*nr];
|
||||
fast_gamma_t* _noalias tabssi = &tabss[itype1*ntypes*nr];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
|
|
@ -84,22 +84,22 @@ void PairLJCharmmCoulLongOpt::eval()
|
|||
double evdwl = 0.0;
|
||||
double ecoul = 0.0;
|
||||
|
||||
double** __restrict__ x = atom->x;
|
||||
double** __restrict__ f = atom->f;
|
||||
double* __restrict__ q = atom->q;
|
||||
int* __restrict__ type = atom->type;
|
||||
double** _noalias x = atom->x;
|
||||
double** _noalias f = atom->f;
|
||||
double* _noalias q = atom->q;
|
||||
int* _noalias type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double* __restrict__ special_coul = force->special_coul;
|
||||
double* __restrict__ special_lj = force->special_lj;
|
||||
double* _noalias special_coul = force->special_coul;
|
||||
double* _noalias special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
inum = list->inum;
|
||||
int* __restrict__ ilist = list->ilist;
|
||||
int** __restrict__ firstneigh = list->firstneigh;
|
||||
int* __restrict__ numneigh = list->numneigh;
|
||||
int* _noalias ilist = list->ilist;
|
||||
int** _noalias firstneigh = list->firstneigh;
|
||||
int* _noalias numneigh = list->numneigh;
|
||||
|
||||
vec3_t* __restrict__ xx = (vec3_t*)x[0];
|
||||
vec3_t* __restrict__ ff = (vec3_t*)f[0];
|
||||
vec3_t* _noalias xx = (vec3_t*)x[0];
|
||||
vec3_t* _noalias ff = (vec3_t*)f[0];
|
||||
|
||||
int ntypes = atom->ntypes;
|
||||
int ntypes2 = ntypes*ntypes;
|
||||
|
@ -107,7 +107,7 @@ void PairLJCharmmCoulLongOpt::eval()
|
|||
double tmp_coef1 = 1.0/denom_lj;
|
||||
double tmp_coef2 = cut_ljsq - 3.0*cut_lj_innersq;
|
||||
|
||||
fast_alpha_t* __restrict__ fast_alpha =
|
||||
fast_alpha_t* _noalias fast_alpha =
|
||||
(fast_alpha_t*)malloc(ntypes2*sizeof(fast_alpha_t));
|
||||
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
|
||||
fast_alpha_t& a = fast_alpha[i*ntypes+j];
|
||||
|
@ -117,7 +117,7 @@ void PairLJCharmmCoulLongOpt::eval()
|
|||
a.lj3 = lj3[i+1][j+1];
|
||||
a.lj4 = lj4[i+1][j+1];
|
||||
}
|
||||
fast_alpha_t* __restrict__ tabsix = fast_alpha;
|
||||
fast_alpha_t* _noalias tabsix = fast_alpha;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
|
@ -128,14 +128,14 @@ void PairLJCharmmCoulLongOpt::eval()
|
|||
double ytmp = xx[i].y;
|
||||
double ztmp = xx[i].z;
|
||||
itype = type[i] - 1;
|
||||
int* __restrict__ jlist = firstneigh[i];
|
||||
int* _noalias jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
double tmpfx = 0.0;
|
||||
double tmpfy = 0.0;
|
||||
double tmpfz = 0.0;
|
||||
|
||||
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes];
|
||||
fast_alpha_t* _noalias tabsixi = (fast_alpha_t*) &tabsix[itype*ntypes];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
|
|
@ -67,24 +67,24 @@ void PairLJCutOpt::eval()
|
|||
double factor_lj;
|
||||
double evdwl = 0.0;
|
||||
|
||||
double** __restrict__ x = atom->x;
|
||||
double** __restrict__ f = atom->f;
|
||||
int* __restrict__ type = atom->type;
|
||||
double** _noalias x = atom->x;
|
||||
double** _noalias f = atom->f;
|
||||
int* _noalias type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double* __restrict__ special_lj = force->special_lj;
|
||||
double* _noalias special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
int* __restrict__ ilist = list->ilist;
|
||||
int** __restrict__ firstneigh = list->firstneigh;
|
||||
int* __restrict__ numneigh = list->numneigh;
|
||||
int* _noalias ilist = list->ilist;
|
||||
int** _noalias firstneigh = list->firstneigh;
|
||||
int* _noalias numneigh = list->numneigh;
|
||||
|
||||
vec3_t* __restrict__ xx = (vec3_t*)x[0];
|
||||
vec3_t* __restrict__ ff = (vec3_t*)f[0];
|
||||
vec3_t* _noalias xx = (vec3_t*)x[0];
|
||||
vec3_t* _noalias ff = (vec3_t*)f[0];
|
||||
|
||||
int ntypes = atom->ntypes;
|
||||
int ntypes2 = ntypes*ntypes;
|
||||
|
||||
fast_alpha_t* __restrict__ fast_alpha =
|
||||
fast_alpha_t* _noalias fast_alpha =
|
||||
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
|
||||
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
|
||||
fast_alpha_t& a = fast_alpha[i*ntypes+j];
|
||||
|
@ -95,7 +95,7 @@ void PairLJCutOpt::eval()
|
|||
a.lj4 = lj4[i+1][j+1];
|
||||
a.offset = offset[i+1][j+1];
|
||||
}
|
||||
fast_alpha_t* __restrict__ tabsix = fast_alpha;
|
||||
fast_alpha_t* _noalias tabsix = fast_alpha;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
|
@ -105,14 +105,14 @@ void PairLJCutOpt::eval()
|
|||
double ytmp = xx[i].y;
|
||||
double ztmp = xx[i].z;
|
||||
itype = type[i] - 1;
|
||||
int* __restrict__ jlist = firstneigh[i];
|
||||
int* _noalias jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
double tmpfx = 0.0;
|
||||
double tmpfy = 0.0;
|
||||
double tmpfz = 0.0;
|
||||
|
||||
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
|
||||
fast_alpha_t* _noalias tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
|
|
@ -68,24 +68,24 @@ void PairMorseOpt::eval()
|
|||
double factor_lj;
|
||||
double evdwl = 0.0;
|
||||
|
||||
double** __restrict__ x = atom->x;
|
||||
double** __restrict__ f = atom->f;
|
||||
int* __restrict__ type = atom->type;
|
||||
double** _noalias x = atom->x;
|
||||
double** _noalias f = atom->f;
|
||||
int* _noalias type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double* __restrict__ special_lj = force->special_lj;
|
||||
double* _noalias special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
int* __restrict__ ilist = list->ilist;
|
||||
int** __restrict__ firstneigh = list->firstneigh;
|
||||
int* __restrict__ numneigh = list->numneigh;
|
||||
int* _noalias ilist = list->ilist;
|
||||
int** _noalias firstneigh = list->firstneigh;
|
||||
int* _noalias numneigh = list->numneigh;
|
||||
|
||||
vec3_t* __restrict__ xx = (vec3_t*)x[0];
|
||||
vec3_t* __restrict__ ff = (vec3_t*)f[0];
|
||||
vec3_t* _noalias xx = (vec3_t*)x[0];
|
||||
vec3_t* _noalias ff = (vec3_t*)f[0];
|
||||
|
||||
int ntypes = atom->ntypes;
|
||||
int ntypes2 = ntypes*ntypes;
|
||||
|
||||
fast_alpha_t* __restrict__ fast_alpha =
|
||||
fast_alpha_t* _noalias fast_alpha =
|
||||
(fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
|
||||
for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
|
||||
fast_alpha_t& a = fast_alpha[i*ntypes+j];
|
||||
|
@ -96,7 +96,7 @@ void PairMorseOpt::eval()
|
|||
a.d0 = d0[i+1][j+1];
|
||||
a.offset = offset[i+1][j+1];
|
||||
}
|
||||
fast_alpha_t* __restrict__ tabsix = fast_alpha;
|
||||
fast_alpha_t* _noalias tabsix = fast_alpha;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
|
@ -106,14 +106,14 @@ void PairMorseOpt::eval()
|
|||
double ytmp = xx[i].y;
|
||||
double ztmp = xx[i].z;
|
||||
itype = type[i] - 1;
|
||||
int* __restrict__ jlist = firstneigh[i];
|
||||
int* _noalias jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
double tmpfx = 0.0;
|
||||
double tmpfy = 0.0;
|
||||
double tmpfz = 0.0;
|
||||
|
||||
fast_alpha_t* __restrict__ tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
|
||||
fast_alpha_t* _noalias tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
|
|
@ -38,8 +38,6 @@ Intel compilers.
|
|||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
The files in this package must be compiled with the Intel C++
|
||||
compiler, i.e. icc/icpc.
|
||||
|
||||
|
||||
|
||||
For portability reasons, vectorization directives are currently only enabled
|
||||
for Intel compilers. Using other compilers may result in significantly
|
||||
lower performance.
|
||||
|
|
|
@ -128,7 +128,7 @@ class FixIntel : public Fix {
|
|||
|
||||
protected:
|
||||
int _overflow_flag[5];
|
||||
__declspec(align(64)) int _off_overflow_flag[5];
|
||||
_alignvar(int _off_overflow_flag[5],64);
|
||||
int _allow_separate_buffers, _offload_ghost;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
double _balance_pair_time, _balance_other_time;
|
||||
|
@ -155,18 +155,18 @@ class FixIntel : public Fix {
|
|||
double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed;
|
||||
double _timers[NUM_ITIMERS];
|
||||
double _stopwatch[NUM_ITIMERS];
|
||||
__declspec(align(64)) double _stopwatch_offload_neighbor[1];
|
||||
__declspec(align(64)) double _stopwatch_offload_pair[1];
|
||||
_alignvar(double _stopwatch_offload_neighbor[1],64);
|
||||
_alignvar(double _stopwatch_offload_pair[1],64);
|
||||
|
||||
template <class ft, class acc_t>
|
||||
inline void add_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
inline void add_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int offload);
|
||||
|
||||
template <class ft, class acc_t>
|
||||
inline void add_oresults(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
inline void add_oresults(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int out_offset, const int nall);
|
||||
|
||||
|
@ -176,8 +176,8 @@ class FixIntel : public Fix {
|
|||
int _im_real_space_task;
|
||||
MPI_Comm _real_space_comm;
|
||||
template <class ft, class acc_t>
|
||||
inline void add_off_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global);
|
||||
inline void add_off_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -284,8 +284,8 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in,
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
void FixIntel::add_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int offload) {
|
||||
start_watch(TIME_PACK);
|
||||
|
@ -295,7 +295,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
|||
if (offload) {
|
||||
add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal);
|
||||
if (force->newton_pair) {
|
||||
const acc_t * restrict const enull = 0;
|
||||
const acc_t * _noalias const enull = 0;
|
||||
int offset = _offload_nlocal;
|
||||
if (atom->torque) offset *= 2;
|
||||
add_oresults(f_in + offset, enull, eatom, vatom,
|
||||
|
@ -305,7 +305,7 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
|||
add_oresults(f_in, ev_global, eatom, vatom,
|
||||
_host_min_local, _host_used_local);
|
||||
if (force->newton_pair) {
|
||||
const acc_t * restrict const enull = 0;
|
||||
const acc_t * _noalias const enull = 0;
|
||||
int offset = _host_used_local;
|
||||
if (atom->torque) offset *= 2;
|
||||
add_oresults(f_in + offset, enull, eatom,
|
||||
|
@ -333,11 +333,11 @@ void FixIntel::add_results(const ft * restrict const f_in,
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_oresults(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global,
|
||||
void FixIntel::add_oresults(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global,
|
||||
const int eatom, const int vatom,
|
||||
const int out_offset, const int nall) {
|
||||
lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
|
||||
lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset;
|
||||
if (atom->torque) {
|
||||
if (f_in[1].w)
|
||||
if (f_in[1].w == 1)
|
||||
|
@ -351,12 +351,16 @@ void FixIntel::add_oresults(const ft * restrict const f_in,
|
|||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
const int tid = omp_get_thread_num();
|
||||
#else
|
||||
const int tid = 0;
|
||||
#endif
|
||||
int ifrom, ito;
|
||||
IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t));
|
||||
if (atom->torque) {
|
||||
int ii = ifrom * 2;
|
||||
lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] +
|
||||
lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] +
|
||||
out_offset;
|
||||
if (eatom) {
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
|
@ -440,6 +444,7 @@ void FixIntel::balance_stamp() {
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixIntel::acc_timers() {
|
||||
_timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair;
|
||||
if (neighbor->ago == 0) {
|
||||
_timers[TIME_OFFLOAD_NEIGHBOR] += *_stopwatch_offload_neighbor;
|
||||
if (_setup_time_cleared == false) {
|
||||
|
@ -447,7 +452,6 @@ void FixIntel::acc_timers() {
|
|||
_setup_time_cleared = true;
|
||||
}
|
||||
}
|
||||
_timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -464,8 +468,8 @@ void FixIntel::set_neighbor_host_sizes() {
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class ft, class acc_t>
|
||||
void FixIntel::add_off_results(const ft * restrict const f_in,
|
||||
const acc_t * restrict const ev_global) {
|
||||
void FixIntel::add_off_results(const ft * _noalias const f_in,
|
||||
const acc_t * _noalias const ev_global) {
|
||||
if (_offload_balance < 0.0)
|
||||
_balance_other_time = MPI_Wtime() - _balance_other_time;
|
||||
|
||||
|
|
|
@ -22,8 +22,8 @@ using namespace LAMMPS_NS;
|
|||
|
||||
template <class flt_t, class acc_t>
|
||||
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
||||
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0),
|
||||
_buf_local_size(0), _off_threads(0) {
|
||||
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
|
||||
_buf_size(0), _buf_local_size(0) {
|
||||
_list_alloc_atoms = 0;
|
||||
_ntypes = 0;
|
||||
_off_map_maxlocal = 0;
|
||||
|
@ -423,6 +423,8 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
|
|||
tmem += _off_map_maxlocal * sizeof(int);
|
||||
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
|
||||
tmem += _ntypes * _ntypes * sizeof(int);
|
||||
|
||||
return tmem;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -235,7 +235,8 @@ class IntelBuffers {
|
|||
|
||||
double memory_usage(const int nthreads);
|
||||
|
||||
int _special_holder, _nspecial_holder;
|
||||
tagint _special_holder;
|
||||
int _nspecial_holder;
|
||||
|
||||
protected:
|
||||
LAMMPS *lmp;
|
||||
|
@ -266,8 +267,8 @@ class IntelBuffers {
|
|||
#endif
|
||||
|
||||
int _buf_size, _buf_local_size;
|
||||
__declspec(align(64)) acc_t _ev_global[8];
|
||||
__declspec(align(64)) acc_t _ev_global_host[8];
|
||||
_alignvar(acc_t _ev_global[8],64);
|
||||
_alignvar(acc_t _ev_global_host[8],64);
|
||||
|
||||
void _grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end);
|
||||
|
|
|
@ -26,6 +26,22 @@
|
|||
|
||||
#ifndef LAMMPS_MEMALIGN
|
||||
#error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile.
|
||||
#else
|
||||
#if (LAMMPS_MEMALIGN != 64)
|
||||
#error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#define _use_omp_pragma(txt) _Pragma(txt)
|
||||
#else
|
||||
#define _use_omp_pragma(txt)
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _use_simd_pragma(txt) _Pragma(txt)
|
||||
#else
|
||||
#define _use_simd_pragma(txt)
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
@ -141,7 +157,7 @@ inline double MIC_Wtime() {
|
|||
if (fix->separate_buffers() && ago != 0) { \
|
||||
fix->start_watch(TIME_PACK); \
|
||||
if (offload) { \
|
||||
_Pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \
|
||||
_use_omp_pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \
|
||||
{ \
|
||||
int ifrom, ito, tid; \
|
||||
int nthreads = comm->nthreads; \
|
||||
|
@ -343,15 +359,16 @@ inline double MIC_Wtime() {
|
|||
else \
|
||||
o_range = nlocal; \
|
||||
if (offload == 0) o_range -= minlocal; \
|
||||
IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \
|
||||
IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \
|
||||
sizeof(acc_t)); \
|
||||
\
|
||||
int t_off = f_stride; \
|
||||
if (eflag && eatom) { \
|
||||
for (int t = 1; t < nthreads; t++) { \
|
||||
_Pragma("vector nontemporal") \
|
||||
_use_simd_pragma("vector nontemporal") \
|
||||
_use_simd_pragma("novector") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].y += f_start[n + t_off].y; \
|
||||
f_start[n].z += f_start[n + t_off].z; \
|
||||
f_start[n].w += f_start[n + t_off].w; \
|
||||
|
@ -360,8 +377,9 @@ inline double MIC_Wtime() {
|
|||
} \
|
||||
} else { \
|
||||
for (int t = 1; t < nthreads; t++) { \
|
||||
_Pragma("vector nontemporal") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
_use_simd_pragma("vector nontemporal") \
|
||||
_use_simd_pragma("novector") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
f_start[n].x += f_start[n + t_off].x; \
|
||||
f_start[n].y += f_start[n + t_off].y; \
|
||||
f_start[n].z += f_start[n + t_off].z; \
|
||||
|
@ -372,8 +390,9 @@ inline double MIC_Wtime() {
|
|||
\
|
||||
if (evflag) { \
|
||||
if (vflag == 2) { \
|
||||
const ATOM_T * restrict const xo = x + minlocal; \
|
||||
_Pragma("vector nontemporal") \
|
||||
const ATOM_T * _noalias const xo = x + minlocal; \
|
||||
_use_simd_pragma("vector nontemporal") \
|
||||
_use_simd_pragma("novector") \
|
||||
for (int n = iifrom; n < iito; n++) { \
|
||||
ov0 += f_start[n].x * xo[n].x; \
|
||||
ov1 += f_start[n].y * xo[n].y; \
|
||||
|
|
|
@ -287,7 +287,7 @@
|
|||
if (fabs(aug_8) > fabs(aug_0)) { \
|
||||
flt_t swapt; \
|
||||
swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \
|
||||
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
|
||||
swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \
|
||||
swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \
|
||||
swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \
|
||||
} \
|
||||
|
|
|
@ -79,7 +79,7 @@ inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
|
|||
const int n1 = nspecial[i * 3]; \
|
||||
const int n2 = nspecial[i * 3 + 1]; \
|
||||
const int n3 = nspecial[i * 3 + 2]; \
|
||||
const int *sptr = special + i * maxspecial; \
|
||||
const tagint *sptr = special + i * maxspecial; \
|
||||
for (int s = 0; s < n3; s++) { \
|
||||
if (sptr[s] == tag) { \
|
||||
if (s < n1) { \
|
||||
|
@ -105,7 +105,7 @@ inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2,
|
|||
|
||||
template <class flt_t, class acc_t>
|
||||
void Neighbor::bin_atoms(void * xin) {
|
||||
const ATOM_T * restrict const x = (const ATOM_T * restrict const)xin;
|
||||
const ATOM_T * _noalias const x = (const ATOM_T * _noalias const)xin;
|
||||
int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
|
@ -243,11 +243,12 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL, *s = NULL;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size, special_size;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
|
@ -260,23 +261,23 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
int * const molecule = atom->molecule;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
|
@ -316,8 +317,8 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
|
@ -327,7 +328,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
|
@ -486,7 +487,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
|
@ -507,7 +508,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
|
|||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
|
@ -662,14 +663,15 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int nall_t = nall;
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL, *s = NULL;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size, special_size;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
|
@ -682,23 +684,23 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
int * const molecule = atom->molecule;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
|
@ -737,8 +739,8 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
|
@ -748,7 +750,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
|
@ -948,7 +950,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
|
@ -970,7 +972,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
|
|||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
|
@ -1127,14 +1129,15 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
return;
|
||||
}
|
||||
|
||||
const ATOM_T * restrict const x = buffers->get_x();
|
||||
int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
int nall_t = nall;
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL, *s = NULL;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size, special_size;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
|
@ -1147,23 +1150,23 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
tag_size = 0;
|
||||
special_size = 0;
|
||||
}
|
||||
const int * restrict const special = s;
|
||||
const int * restrict const nspecial = ns;
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const int * restrict const tag = atom->tag;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * restrict const ilist = list->ilist;
|
||||
int * restrict numneigh = list->numneigh;
|
||||
int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = list->nstencil;
|
||||
const int * restrict const stencil = list->stencil;
|
||||
const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int * _noalias const stencil = list->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
int * const molecule = atom->molecule;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
|
@ -1202,8 +1205,8 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * restrict const binhead = this->binhead;
|
||||
const int * restrict const special_flag = this->special_flag;
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const special_flag = this->special_flag;
|
||||
const int nbinx = this->nbinx;
|
||||
const int nbiny = this->nbiny;
|
||||
const int nbinz = this->nbinz;
|
||||
|
@ -1213,7 +1216,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * restrict const bins = this->bins;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = fix->coprocessor_number();
|
||||
const int separate_buffers = fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
|
@ -1386,7 +1389,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
|
@ -1407,7 +1410,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
|||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * restrict jlist = firstneigh + cnumneigh[i];
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
|
|
|
@ -79,7 +79,7 @@ void PairGayBerneIntel::compute(int eflag, int vflag,
|
|||
fix->start_watch(TIME_PACK);
|
||||
const AtomVecEllipsoid::Bonus * const bonus = avec->bonus;
|
||||
const int * const ellipsoid = atom->ellipsoid;
|
||||
QUAT_T * restrict const quat = buffers->get_quat();
|
||||
QUAT_T * _noalias const quat = buffers->get_quat();
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
|
||||
#endif
|
||||
|
@ -150,8 +150,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||
|
||||
const int ago = neighbor->ago;
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
QUAT_T * restrict const quat = buffers->get_quat(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
QUAT_T * _noalias const quat = buffers->get_quat(offload);
|
||||
const AtomVecEllipsoid::Bonus *bonus = avec->bonus;
|
||||
const int *ellipsoid = atom->ellipsoid;
|
||||
|
||||
|
@ -225,15 +225,15 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
}
|
||||
#endif
|
||||
|
||||
// const int * restrict const ilist = list->ilist;
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
// const int * _noalias const ilist = list->ilist;
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
|
||||
const FC_PACKED1_T * restrict const ijc = fc.ijc[0];
|
||||
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
|
||||
const FC_PACKED3_T * restrict const ic = fc.ic;
|
||||
const FC_PACKED1_T * _noalias const ijc = fc.ijc[0];
|
||||
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
|
||||
const FC_PACKED3_T * _noalias const ic = fc.ic;
|
||||
const flt_t mu = fc.mu;
|
||||
const flt_t gamma = fc.gamma;
|
||||
const flt_t upsilon = fc.upsilon;
|
||||
|
@ -255,8 +255,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
const int max_nbors = _max_nbors;
|
||||
const int nthreads = tc;
|
||||
|
@ -351,25 +351,25 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal * 2 + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal * 2 + (tid * f_stride);
|
||||
memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
flt_t * restrict const rsq_form = rsq_formi + tid * max_nbors;
|
||||
flt_t * restrict const delx_form = delx_formi + tid * max_nbors;
|
||||
flt_t * restrict const dely_form = dely_formi + tid * max_nbors;
|
||||
flt_t * restrict const delz_form = delz_formi + tid * max_nbors;
|
||||
int * restrict const jtype_form = jtype_formi + tid * max_nbors;
|
||||
int * restrict const jlist_form = jlist_formi + tid * max_nbors;
|
||||
flt_t * _noalias const rsq_form = rsq_formi + tid * max_nbors;
|
||||
flt_t * _noalias const delx_form = delx_formi + tid * max_nbors;
|
||||
flt_t * _noalias const dely_form = dely_formi + tid * max_nbors;
|
||||
flt_t * _noalias const delz_form = delz_formi + tid * max_nbors;
|
||||
int * _noalias const jtype_form = jtype_formi + tid * max_nbors;
|
||||
int * _noalias const jlist_form = jlist_formi + tid * max_nbors;
|
||||
|
||||
int ierror = 0;
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
// const int i = ilist[ii];
|
||||
const int itype = x[i].w;
|
||||
const int ptr_off = itype * ntypes;
|
||||
const FC_PACKED1_T * restrict const ijci = ijc + ptr_off;
|
||||
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
|
||||
const FC_PACKED1_T * _noalias const ijci = ijc + ptr_off;
|
||||
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
const flt_t xtmp = x[i].x;
|
||||
|
@ -433,9 +433,11 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
__assume(packed_j % 8 == 0);
|
||||
__assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0);
|
||||
#endif
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \
|
||||
sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < packed_j; jj++) {
|
||||
flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8;
|
||||
flt_t b2_0, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6, b2_7, b2_8;
|
||||
|
@ -796,7 +798,10 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
int t_off = f_stride;
|
||||
if (EFLAG && eatom) {
|
||||
for (int t = 1; t < nthreads; t++) {
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector nontemporal
|
||||
#pragma novector
|
||||
#endif
|
||||
for (int n = iifrom * 2; n < two_iito; n++) {
|
||||
f_start[n].x += f_start[n + t_off].x;
|
||||
f_start[n].y += f_start[n + t_off].y;
|
||||
|
@ -807,7 +812,10 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
}
|
||||
} else {
|
||||
for (int t = 1; t < nthreads; t++) {
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector nontemporal
|
||||
#pragma novector
|
||||
#endif
|
||||
for (int n = iifrom * 2; n < two_iito; n++) {
|
||||
f_start[n].x += f_start[n + t_off].x;
|
||||
f_start[n].y += f_start[n + t_off].y;
|
||||
|
@ -819,8 +827,11 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
|
||||
if (EVFLAG) {
|
||||
if (vflag==2) {
|
||||
const ATOM_T * restrict const xo = x + minlocal;
|
||||
const ATOM_T * _noalias const xo = x + minlocal;
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector nontemporal
|
||||
#pragma novector
|
||||
#endif
|
||||
for (int n = iifrom; n < iito; n++) {
|
||||
const int nt2 = n * 2;
|
||||
ov0 += f_start[nt2].x * xo[n].x;
|
||||
|
|
|
@ -62,7 +62,10 @@ class PairGayBerneIntel : public PairGayBerne {
|
|||
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||
typedef struct { flt_t shape2[4], well[4]; } fc_packed3;
|
||||
|
||||
__declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu;
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
_alignvar(flt_t gamma,64);
|
||||
_alignvar(flt_t upsilon,64);
|
||||
_alignvar(flt_t mu,64);
|
||||
fc_packed1 **ijc;
|
||||
fc_packed2 **lj34;
|
||||
fc_packed3 *ic;
|
||||
|
|
|
@ -143,25 +143,25 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
flt_t * restrict const q = buffers->get_q(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
flt_t * _noalias const q = buffers->get_q(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const flt_t * restrict const special_coul = fc.special_coul;
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const flt_t qqrd2e = force->qqrd2e;
|
||||
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
|
||||
|
||||
const flt_t * restrict const cutsq = fc.cutsq[0];
|
||||
const LJ_T * restrict const lj = fc.lj[0];
|
||||
const TABLE_T * restrict const table = fc.table;
|
||||
const flt_t * restrict const etable = fc.etable;
|
||||
const flt_t * restrict const detable = fc.detable;
|
||||
const flt_t * restrict const ctable = fc.ctable;
|
||||
const flt_t * restrict const dctable = fc.dctable;
|
||||
const flt_t * _noalias const cutsq = fc.cutsq[0];
|
||||
const LJ_T * _noalias const lj = fc.lj[0];
|
||||
const TABLE_T * _noalias const table = fc.table;
|
||||
const flt_t * _noalias const etable = fc.etable;
|
||||
const flt_t * _noalias const detable = fc.detable;
|
||||
const flt_t * _noalias const ctable = fc.ctable;
|
||||
const flt_t * _noalias const dctable = fc.dctable;
|
||||
const flt_t cut_ljsq = fc.cut_ljsq;
|
||||
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
|
||||
const flt_t cut_coulsq = fc.cut_coulsq;
|
||||
|
@ -178,8 +178,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
|
||||
const int nthreads = tc;
|
||||
|
@ -242,7 +242,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
flt_t cutboth = cut_coulsq;
|
||||
|
||||
|
@ -251,10 +251,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const flt_t * restrict const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * restrict const lji = lj + ptr_off;
|
||||
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * _noalias const lji = lj + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
|
@ -270,9 +270,11 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
|||
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
}
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
||||
|
|
|
@ -62,8 +62,8 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
|
|||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||
__declspec(align(64)) flt_t special_coul[4];
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_coul[4],64);
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
flt_t **cutsq, g_ewald, tabinnersq;
|
||||
flt_t cut_coulsq, cut_ljsq;
|
||||
flt_t cut_lj_innersq;
|
||||
|
|
|
@ -143,24 +143,24 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
flt_t * restrict const q = buffers->get_q(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
flt_t * _noalias const q = buffers->get_q(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const flt_t * restrict const special_coul = fc.special_coul;
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const flt_t qqrd2e = force->qqrd2e;
|
||||
|
||||
const C_FORCE_T * restrict const c_force = fc.c_force[0];
|
||||
const C_ENERGY_T * restrict const c_energy = fc.c_energy[0];
|
||||
const TABLE_T * restrict const table = fc.table;
|
||||
const flt_t * restrict const etable = fc.etable;
|
||||
const flt_t * restrict const detable = fc.detable;
|
||||
const flt_t * restrict const ctable = fc.ctable;
|
||||
const flt_t * restrict const dctable = fc.dctable;
|
||||
const C_FORCE_T * _noalias const c_force = fc.c_force[0];
|
||||
const C_ENERGY_T * _noalias const c_energy = fc.c_energy[0];
|
||||
const TABLE_T * _noalias const table = fc.table;
|
||||
const flt_t * _noalias const etable = fc.etable;
|
||||
const flt_t * _noalias const detable = fc.detable;
|
||||
const flt_t * _noalias const ctable = fc.ctable;
|
||||
const flt_t * _noalias const dctable = fc.dctable;
|
||||
const flt_t g_ewald = fc.g_ewald;
|
||||
const flt_t tabinnersq = fc.tabinnersq;
|
||||
|
||||
|
@ -174,8 +174,8 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
|
||||
const int nthreads = tc;
|
||||
|
@ -237,17 +237,17 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const C_FORCE_T * restrict const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off;
|
||||
const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off;
|
||||
const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
|
@ -263,9 +263,11 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
|||
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
}
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl, ecoul;
|
||||
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
|
||||
|
|
|
@ -64,8 +64,8 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong {
|
|||
typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t;
|
||||
typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t;
|
||||
typedef struct { flt_t r, dr, f, df; } table_t;
|
||||
__declspec(align(64)) flt_t special_coul[4];
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_coul[4],64);
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
flt_t g_ewald, tabinnersq;
|
||||
c_force_t **c_force;
|
||||
c_energy_t **c_energy;
|
||||
|
|
|
@ -134,14 +134,14 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
|||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * restrict const x = buffers->get_x(offload);
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
|
||||
const int * restrict const numneigh = list->numneigh;
|
||||
const int * restrict const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * restrict const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * restrict const special_lj = fc.special_lj;
|
||||
const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0];
|
||||
const FC_PACKED2_T * restrict const lj34 = fc.lj34[0];
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const FC_PACKED1_T * _noalias const ljc12o = fc.ljc12o[0];
|
||||
const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0];
|
||||
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int eatom = this->eflag_atom;
|
||||
|
@ -153,8 +153,8 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
|||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * restrict f_start;
|
||||
acc_t * restrict ev_global;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
int *overflow = fix->get_off_overflow_flag();
|
||||
|
@ -184,17 +184,17 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
|||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride);
|
||||
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
|
||||
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
for (int i = iifrom; i < iito; ++i) {
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off;
|
||||
const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off;
|
||||
const FC_PACKED1_T * _noalias const ljc12oi = ljc12o + ptr_off;
|
||||
const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off;
|
||||
|
||||
const int * restrict const jlist = firstneigh + cnumneigh[i];
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp, fytmp, fztmp, fwtmp;
|
||||
|
@ -209,9 +209,11 @@ void PairLJCutIntel::eval(const int offload, const int vflag,
|
|||
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
}
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
flt_t forcelj, evdwl;
|
||||
forcelj = evdwl = (flt_t)0.0;
|
||||
|
|
|
@ -62,7 +62,7 @@ class PairLJCutIntel : public PairLJCut {
|
|||
typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1;
|
||||
typedef struct { flt_t lj3, lj4; } fc_packed2;
|
||||
|
||||
__declspec(align(64)) flt_t special_lj[4];
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
fc_packed1 **ljc12o;
|
||||
fc_packed2 **lj34;
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
|
@ -81,14 +82,9 @@ void VerletIntel::init()
|
|||
// set flags for what arrays to clear in force_clear()
|
||||
// need to clear additionals arrays if they exist
|
||||
|
||||
torqueflag = 0;
|
||||
torqueflag = extraflag = 0;
|
||||
if (atom->torque_flag) torqueflag = 1;
|
||||
erforceflag = 0;
|
||||
if (atom->erforce_flag) erforceflag = 1;
|
||||
e_flag = 0;
|
||||
if (atom->e_flag) e_flag = 1;
|
||||
rho_flag = 0;
|
||||
if (atom->rho_flag) rho_flag = 1;
|
||||
if (atom->avec->forceclearflag) extraflag = 1;
|
||||
|
||||
// orthogonal vs triclinic simulation box
|
||||
|
||||
|
@ -388,7 +384,7 @@ void VerletIntel::cleanup()
|
|||
|
||||
void VerletIntel::force_clear()
|
||||
{
|
||||
int i;
|
||||
size_t nbytes;
|
||||
|
||||
if (external_force_clear) return;
|
||||
|
||||
|
@ -396,19 +392,16 @@ void VerletIntel::force_clear()
|
|||
// if either newton flag is set, also include ghosts
|
||||
// when using threads always clear all forces.
|
||||
|
||||
if (neighbor->includegroup == 0) {
|
||||
int nall;
|
||||
if (force->newton) nall = atom->nlocal + atom->nghost;
|
||||
else nall = atom->nlocal;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
size_t nbytes = sizeof(double) * nall;
|
||||
if (neighbor->includegroup == 0) {
|
||||
nbytes = sizeof(double) * nlocal;
|
||||
if (force->newton) nbytes += sizeof(double) * atom->nghost;
|
||||
|
||||
if (nbytes) {
|
||||
memset(&(atom->f[0][0]),0,3*nbytes);
|
||||
if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes);
|
||||
if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes);
|
||||
if (e_flag) memset(&(atom->de[0]), 0, nbytes);
|
||||
if (rho_flag) memset(&(atom->drho[0]), 0, nbytes);
|
||||
memset(&atom->f[0][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(0,nbytes);
|
||||
}
|
||||
|
||||
// neighbor includegroup flag is set
|
||||
|
@ -416,70 +409,21 @@ void VerletIntel::force_clear()
|
|||
// if either newton flag is set, also include ghosts
|
||||
|
||||
} else {
|
||||
int nall = atom->nfirst;
|
||||
nbytes = sizeof(double) * atom->nfirst;
|
||||
|
||||
double **f = atom->f;
|
||||
for (i = 0; i < nall; i++) {
|
||||
f[i][0] = 0.0;
|
||||
f[i][1] = 0.0;
|
||||
f[i][2] = 0.0;
|
||||
}
|
||||
|
||||
if (torqueflag) {
|
||||
double **torque = atom->torque;
|
||||
for (i = 0; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atom->erforce;
|
||||
for (i = 0; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atom->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atom->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
if (nbytes) {
|
||||
memset(&atom->f[0][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[0][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(0,nbytes);
|
||||
}
|
||||
|
||||
if (force->newton) {
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
nbytes = sizeof(double) * atom->nghost;
|
||||
|
||||
for (i = atom->nlocal; i < nall; i++) {
|
||||
f[i][0] = 0.0;
|
||||
f[i][1] = 0.0;
|
||||
f[i][2] = 0.0;
|
||||
}
|
||||
|
||||
if (torqueflag) {
|
||||
double **torque = atom->torque;
|
||||
for (i = atom->nlocal; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atom->erforce;
|
||||
for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atom->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atom->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
if (nbytes) {
|
||||
memset(&atom->f[nlocal][0],0,3*nbytes);
|
||||
if (torqueflag) memset(&atom->torque[nlocal][0],0,3*nbytes);
|
||||
if (extraflag) atom->avec->force_clear(nlocal,nbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,8 +39,7 @@ class VerletIntel : public Integrate {
|
|||
|
||||
protected:
|
||||
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||
int torqueflag,erforceflag;
|
||||
int e_flag,rho_flag;
|
||||
int torqueflag,extraflag;
|
||||
|
||||
virtual void force_clear();
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
|
|
|
@ -52,6 +52,9 @@ VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) :
|
|||
if (universe->procs_per_world[0] % universe->procs_per_world[1])
|
||||
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
|
||||
"size be multiple of Kspace partition size");
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"Verlet/split can only currently be used with "
|
||||
"comm_style brick");
|
||||
|
||||
// master = 1 for Rspace procs, 0 for Kspace procs
|
||||
|
||||
|
@ -214,6 +217,9 @@ VerletSplitIntel::~VerletSplitIntel()
|
|||
|
||||
void VerletSplitIntel::init()
|
||||
{
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"Verlet/split can only currently be used with "
|
||||
"comm_style brick");
|
||||
if (!force->kspace && comm->me == 0)
|
||||
error->warning(FLERR,"No Kspace calculation with verlet/split");
|
||||
|
||||
|
|
|
@ -48,11 +48,6 @@ static double mypow(double x, int n) {
|
|||
}
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -22,11 +22,6 @@
|
|||
using namespace LAMMPS_NS;
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
enforce PBC and modify box image flags for each atom
|
||||
|
|
|
@ -33,11 +33,6 @@ using namespace FixConst;
|
|||
enum{NOBIAS,BIAS};
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -34,11 +34,6 @@ enum{ISO,ANISO,TRICLINIC};
|
|||
#define TILTMAX 1.5
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
change box size
|
||||
|
|
|
@ -31,11 +31,6 @@ enum{NOBIAS,BIAS};
|
|||
#define INERTIA 0.4 // moment of inertia prefactor for sphere
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -19,11 +19,6 @@ using namespace LAMMPS_NS;
|
|||
using namespace FixConst;
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -34,11 +34,6 @@ using namespace FixConst;
|
|||
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -48,11 +48,6 @@ enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid
|
|||
#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform preforce velocity Verlet integration
|
||||
|
|
|
@ -42,11 +42,6 @@ enum{SINGLE,MOLECULE,GROUP}; // same as in FixRigid
|
|||
#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -42,11 +42,6 @@ using namespace MathConst;
|
|||
enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
|
||||
|
||||
typedef struct { double x,y,z; } dbl3_t;
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -195,14 +195,4 @@ typedef struct { int a,b,c,d,t; } int5_t;
|
|||
|
||||
}
|
||||
|
||||
#ifdef _noalias
|
||||
#undef _noalias
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -167,6 +167,36 @@ typedef int bigint;
|
|||
|
||||
}
|
||||
|
||||
// preprocessor macros for compiler specific settings
|
||||
// clear previous definitions to avoid redefinition warning
|
||||
|
||||
#ifdef _alignvar
|
||||
#undef _alignvar
|
||||
#endif
|
||||
#ifdef _noalias
|
||||
#undef _noalias
|
||||
#endif
|
||||
|
||||
// define stack variable alignment
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _alignvar(expr,val) __declspec(align(val)) expr
|
||||
#elif defined(__GNUC__)
|
||||
#define _alignvar(expr,val) expr __attribute((aligned(val)))
|
||||
#else
|
||||
#define _alignvar(expr,val) expr
|
||||
#endif
|
||||
|
||||
// declaration to lift aliasing restrictions
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define _noalias restrict
|
||||
#elif defined(__GNUC__)
|
||||
#define _noalias __restrict
|
||||
#else
|
||||
#define _noalias
|
||||
#endif
|
||||
|
||||
// settings to enable LAMMPS to build under Windows
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#define ATOBIGINT _atoi64
|
||||
|
||||
#define pclose _pclose
|
||||
#define __restrict__ __restrict
|
||||
|
||||
// the following functions ared defined to get rid of
|
||||
// 'ambiguous call to overloaded function' error in VSS for mismathched type arguments
|
||||
|
|
Loading…
Reference in New Issue