forked from lijiext/lammps
Updated pair coul/long/cs and born/coul/long/cs; updated gpu neighbor builds to support core-shell styles where r2 can be tiny.
This commit is contained in:
parent
6842a527e0
commit
70a7b37614
|
@ -9,7 +9,7 @@
|
|||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// begin : June 2018
|
||||
// email : trung.nguyen@northwestern.edu
|
||||
// ***************************************************************************/
|
||||
|
||||
|
@ -29,15 +29,19 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
#define B0 (numtyp)-0.1335096380159268
|
||||
#define B1 (numtyp)-2.57839507e-1
|
||||
#define B2 (numtyp)-1.37203639e-1
|
||||
#define B3 (numtyp)-8.88822059e-3
|
||||
#define B4 (numtyp)-5.80844129e-3
|
||||
#define B5 (numtyp)1.14652755e-1
|
||||
#define EPSILON (numtyp)1.0e-20
|
||||
#define EPS_EWALD (numtyp)1.0e-6
|
||||
#define EPS_EWALD_SQR (numtyp)1.0e-12
|
||||
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||
// acctyp is needed for these parameters
|
||||
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||
#define B0 (acctyp)-0.1335096380159268
|
||||
#define B1 (acctyp)-2.57839507e-1
|
||||
#define B2 (acctyp)-1.37203639e-1
|
||||
#define B3 (acctyp)-8.88822059e-3
|
||||
#define B4 (acctyp)-5.80844129e-3
|
||||
#define B5 (acctyp)1.14652755e-1
|
||||
|
||||
#define EPSILON (acctyp)(1.0e-20)
|
||||
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||
|
||||
__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
|
@ -105,39 +109,37 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
||||
numtyp rexp = (numtyp)0.0;
|
||||
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON;
|
||||
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
forcecoul *= r2inv;
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||
forcecoul *= r2inv;
|
||||
}
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
|
@ -146,7 +148,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
force = forceborn*r2inv + forcecoul;
|
||||
force = (forcecoul + forceborn) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
|
@ -154,9 +156,9 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
e_coul += prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0)
|
||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
|
@ -248,39 +250,38 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
||||
numtyp rexp = (numtyp)0.0;
|
||||
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON;
|
||||
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
forcecoul *= r2inv;
|
||||
} else {
|
||||
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||
forcecoul *= r2inv;
|
||||
}
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
|
@ -289,7 +290,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
force = forceborn*r2inv + forcecoul;
|
||||
force = (forcecoul + forceborn) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
|
@ -297,9 +298,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
e_coul += prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0)
|
||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
|
|
|
@ -29,6 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||
// acctyp is needed for these parameters
|
||||
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||
#define B0 (acctyp)-0.1335096380159268
|
||||
#define B1 (acctyp)-2.57839507e-1
|
||||
#define B2 (acctyp)-1.37203639e-1
|
||||
#define B3 (acctyp)-8.88822059e-3
|
||||
#define B4 (acctyp)-5.80844129e-3
|
||||
#define B5 (acctyp)1.14652755e-1
|
||||
|
||||
#define EPSILON (acctyp)(1.0e-20)
|
||||
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||
|
@ -123,16 +137,6 @@ texture<int2> q_tex;
|
|||
|
||||
#endif
|
||||
|
||||
#define B0 (numtyp)-0.1335096380159268
|
||||
#define B1 (numtyp)-2.57839507e-1
|
||||
#define B2 (numtyp)-1.37203639e-1
|
||||
#define B3 (numtyp)-8.88822059e-3
|
||||
#define B4 (numtyp)-5.80844129e-3
|
||||
#define B5 (numtyp)1.14652755e-1
|
||||
#define EPSILON (numtyp)1.0e-20
|
||||
#define EPS_EWALD (numtyp)1.0e-6
|
||||
#define EPS_EWALD_SQR (numtyp)1.0e-12
|
||||
|
||||
__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict scale,
|
||||
const int lj_types,
|
||||
|
@ -191,7 +195,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON;
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
|
||||
numtyp force,prefactor,_erfc;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
@ -201,17 +205,19 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
force *= r2inv;
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
|
@ -224,9 +230,9 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0)
|
||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
|
@ -304,7 +310,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON;
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
|
||||
numtyp force,prefactor,_erfc;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
@ -314,32 +320,34 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
|||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
force *= r2inv;
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2);
|
||||
force *= r2inv;
|
||||
}
|
||||
|
||||
force *= r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0)
|
||||
e_coul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
|
|
|
@ -118,24 +118,24 @@ __kernel void transpose(__global tagint *restrict out,
|
|||
const __global tagint *restrict in,
|
||||
int columns_in, int rows_in)
|
||||
{
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
|
||||
unsigned ti=THREAD_ID_X;
|
||||
unsigned tj=THREAD_ID_Y;
|
||||
unsigned bi=BLOCK_ID_X;
|
||||
unsigned bj=BLOCK_ID_Y;
|
||||
unsigned ti=THREAD_ID_X;
|
||||
unsigned tj=THREAD_ID_Y;
|
||||
unsigned bi=BLOCK_ID_X;
|
||||
unsigned bj=BLOCK_ID_Y;
|
||||
|
||||
unsigned i=bi*BLOCK_CELL_2D+ti;
|
||||
unsigned j=bj*BLOCK_CELL_2D+tj;
|
||||
if ((i<columns_in) && (j<rows_in))
|
||||
block[tj][ti]=in[j*columns_in+i];
|
||||
unsigned i=bi*BLOCK_CELL_2D+ti;
|
||||
unsigned j=bj*BLOCK_CELL_2D+tj;
|
||||
if ((i<columns_in) && (j<rows_in))
|
||||
block[tj][ti]=in[j*columns_in+i];
|
||||
|
||||
__syncthreads();
|
||||
__syncthreads();
|
||||
|
||||
i=bj*BLOCK_CELL_2D+ti;
|
||||
j=bi*BLOCK_CELL_2D+tj;
|
||||
if ((i<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i] = block[ti][tj];
|
||||
i=bj*BLOCK_CELL_2D+ti;
|
||||
j=bi*BLOCK_CELL_2D+tj;
|
||||
if ((i<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i] = block[ti][tj];
|
||||
}
|
||||
|
||||
__kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
|
@ -191,7 +191,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
|||
nbor_list[pid_i]=pid_i;
|
||||
} else {
|
||||
stride=0;
|
||||
neigh_counts=host_numj+pid_i-inum;
|
||||
neigh_counts=host_numj+pid_i-inum;
|
||||
neigh_list=host_nbor_list+(pid_i-inum)*neigh_bin_size;
|
||||
}
|
||||
|
||||
|
@ -232,7 +232,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
|||
diff.z = atom_i.z - pos_sh[j].z;
|
||||
|
||||
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
||||
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
||||
if (r2 < cell_size*cell_size && pid_j != pid_i) { // && r2 > 1e-5
|
||||
cnt++;
|
||||
if (cnt <= neigh_bin_size) {
|
||||
*neigh_list = pid_j;
|
||||
|
@ -243,8 +243,8 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
|||
}
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
} // for (k)
|
||||
__syncthreads();
|
||||
} // for (k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue