forked from lijiext/lammps
228 lines
6.5 KiB
Plaintext
228 lines
6.5 KiB
Plaintext
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
|
|
Original Version:
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
|
|
-----------------------------------------------------------------------
|
|
|
|
USER-CUDA Package and associated modifications:
|
|
https://sourceforge.net/projects/lammpscuda/
|
|
|
|
Christian Trott, christian.trott@tu-ilmenau.de
|
|
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
|
Theoretical Physics II, University of Technology Ilmenau, Germany
|
|
|
|
See the README file in the USER-CUDA directory.
|
|
|
|
This software is distributed under the GNU General Public License.
|
|
------------------------------------------------------------------------- */
|
|
|
|
|
|
__global__ void PairGranHookeCuda_Kernel(int eflag, int vflag, int eflag_atom, int vflag_atom, int** firstneight, int* binned_id
|
|
, F_FLOAT kn, F_FLOAT gamman, F_FLOAT gammat, F_FLOAT xmu)
|
|
{
|
|
ENERGY_FLOAT evdwl = ENERGY_F(0.0);
|
|
|
|
ENERGY_FLOAT* sharedE;
|
|
ENERGY_FLOAT* sharedV;
|
|
|
|
if(eflag || eflag_atom) {
|
|
sharedE = &sharedmem[threadIdx.x];
|
|
sharedV = &sharedmem[0];
|
|
sharedE[0] = ENERGY_F(0.0);
|
|
sharedV += blockDim.x;
|
|
}
|
|
|
|
if(vflag || vflag_atom) {
|
|
sharedV += threadIdx.x;
|
|
sharedV[0 * blockDim.x] = ENERGY_F(0.0);
|
|
sharedV[1 * blockDim.x] = ENERGY_F(0.0);
|
|
sharedV[2 * blockDim.x] = ENERGY_F(0.0);
|
|
sharedV[3 * blockDim.x] = ENERGY_F(0.0);
|
|
sharedV[4 * blockDim.x] = ENERGY_F(0.0);
|
|
sharedV[5 * blockDim.x] = ENERGY_F(0.0);
|
|
}
|
|
|
|
int ii = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
|
|
|
|
MYEMUDBG(if(ii == 0) printf("# CUDA: PairGranHookeCuda_Kernel: -- no binning --\n");)
|
|
|
|
X_FLOAT xtmp, ytmp, ztmp;
|
|
|
|
X_FLOAT4 myxtype;
|
|
V_FLOAT4 myvradius, ovradius;
|
|
F_FLOAT fxtmp, fytmp, fztmp, torquextmp, torqueytmp, torqueztmp;
|
|
F_FLOAT delx, dely, delz;
|
|
F_FLOAT radi, radj, radsum, r, rsqinv;
|
|
F_FLOAT vr1, vr2, vr3, vnnr, vn1, vn2, vn3, vt1, vt2, vt3;
|
|
F_FLOAT wr1, wr2, wr3;
|
|
F_FLOAT vtr1, vtr2, vtr3, vrel;
|
|
F_FLOAT meff, damp, ccel, tor1, tor2, tor3;
|
|
F_FLOAT fn, fs, ft, fs1, fs2, fs3;
|
|
|
|
int jnum = 0;
|
|
int i, j;
|
|
int* jlist;
|
|
|
|
if(ii < _inum) {
|
|
i = _ilist[ii];
|
|
|
|
myxtype = fetchXType(i);
|
|
myvradius = fetchVRadius(i);
|
|
|
|
xtmp = myxtype.x;
|
|
ytmp = myxtype.y;
|
|
ztmp = myxtype.z;
|
|
radi = myvradius.w;
|
|
|
|
fxtmp = F_F(0.0);
|
|
fytmp = F_F(0.0);
|
|
fztmp = F_F(0.0);
|
|
torquextmp = F_F(0.0);
|
|
torqueytmp = F_F(0.0);
|
|
torqueztmp = F_F(0.0);
|
|
|
|
jnum = _numneigh[i];
|
|
|
|
jlist = &_neighbors[i];
|
|
}
|
|
|
|
__syncthreads();
|
|
|
|
for(int jj = 0; jj < jnum; jj++) {
|
|
if(ii < _inum)
|
|
if(jj < jnum) {
|
|
j = jlist[jj * _nlocal];
|
|
|
|
myxtype = fetchXType(j);
|
|
ovradius = fetchVRadius(j);
|
|
|
|
delx = xtmp - myxtype.x;
|
|
dely = ytmp - myxtype.y;
|
|
delz = ztmp - myxtype.z;
|
|
|
|
radj = ovradius.w;
|
|
radsum = radi + radj;
|
|
|
|
const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
|
|
|
if(rsq < radsum * radsum) {
|
|
const F_FLOAT rinv = _RSQRT_(rsq);
|
|
r = F_F(1.0) / rinv;
|
|
rsqinv = F_F(1.0) / rsq;
|
|
|
|
// relative translational velocity
|
|
|
|
vr1 = myvradius.x - ovradius.x;
|
|
vr2 = myvradius.y - ovradius.y;
|
|
vr3 = myvradius.z - ovradius.z;
|
|
|
|
// normal component
|
|
|
|
vnnr = vr1 * delx + vr2 * dely + vr3 * delz;
|
|
vn1 = delx * vnnr * rsqinv;
|
|
vn2 = dely * vnnr * rsqinv;
|
|
vn3 = delz * vnnr * rsqinv;
|
|
|
|
// tangential component
|
|
|
|
vt1 = vr1 - vn1;
|
|
vt2 = vr2 - vn2;
|
|
vt3 = vr3 - vn3;
|
|
|
|
// relative rotational velocity
|
|
V_FLOAT4 omegarmass_i = fetchOmegaRmass(i);
|
|
V_FLOAT4 omegarmass_j = fetchOmegaRmass(j);
|
|
|
|
wr1 = (radi * omegarmass_i.x + radj * omegarmass_j.x) * rinv;
|
|
wr2 = (radi * omegarmass_i.y + radj * omegarmass_j.y) * rinv;
|
|
wr3 = (radi * omegarmass_i.z + radj * omegarmass_j.z) * rinv;
|
|
|
|
meff = omegarmass_i.w * omegarmass_j.w / (omegarmass_i.w + omegarmass_j.w);
|
|
|
|
if(_mask[i] & _freeze_group_bit) meff = omegarmass_j.w;
|
|
|
|
if(_mask[j] & _freeze_group_bit) meff = omegarmass_i.w;
|
|
|
|
damp = meff * gamman * vnnr * rsqinv;
|
|
ccel = kn * (radsum - r) * rinv - damp;
|
|
|
|
vtr1 = vt1 - (delz * wr2 - dely * wr3);
|
|
vtr2 = vt2 - (delx * wr3 - delz * wr1);
|
|
vtr3 = vt3 - (dely * wr1 - delx * wr2);
|
|
vrel = vtr1 * vtr1 + vtr2 * vtr2 + vtr3 * vtr3;
|
|
vrel = _SQRT_(vrel);
|
|
|
|
fn = xmu * fabs(ccel * r);
|
|
fs = meff * gammat * vrel;
|
|
ft = (vrel != F_F(0.0)) ? MIN(fn, fs) / vrel : F_F(0.0);
|
|
|
|
fs1 = -ft * vtr1;
|
|
fs2 = -ft * vtr2;
|
|
fs3 = -ft * vtr3;
|
|
|
|
F_FLOAT dxfp, dyfp, dzfp;
|
|
fxtmp += dxfp = delx * ccel + fs1;
|
|
fytmp += dyfp = dely * ccel + fs2;
|
|
fztmp += dzfp = delz * ccel + fs3;
|
|
|
|
tor1 = rinv * (dely * fs3 - delz * fs2);
|
|
tor2 = rinv * (delz * fs1 - delx * fs3);
|
|
tor3 = rinv * (delx * fs2 - dely * fs1);
|
|
|
|
torquextmp -= radi * tor1;
|
|
torqueytmp -= radi * tor2;
|
|
torqueztmp -= radi * tor3;
|
|
|
|
if(vflag) {
|
|
sharedV[0 * blockDim.x] += delx * dxfp;
|
|
sharedV[1 * blockDim.x] += dely * dyfp;
|
|
sharedV[2 * blockDim.x] += delz * dzfp;
|
|
sharedV[3 * blockDim.x] += delx * dyfp;
|
|
sharedV[4 * blockDim.x] += delx * dzfp;
|
|
sharedV[5 * blockDim.x] += dely * dzfp;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
__syncthreads();
|
|
|
|
if(ii < _inum) {
|
|
F_FLOAT* my_f = _f + i;
|
|
*my_f += fxtmp;
|
|
my_f += _nmax;
|
|
*my_f += fytmp;
|
|
my_f += _nmax;
|
|
*my_f += fztmp;
|
|
F_FLOAT* my_torque = _torque + i;
|
|
*my_torque += torquextmp;
|
|
my_torque += _nmax;
|
|
*my_torque += torqueytmp;
|
|
my_torque += _nmax;
|
|
*my_torque += torqueztmp;
|
|
}
|
|
|
|
__syncthreads();
|
|
|
|
if(eflag) sharedE[0] = evdwl;
|
|
|
|
if(eflag_atom && i < _nlocal) _eatom[i] += evdwl;
|
|
|
|
if(vflag_atom && i < _nlocal) {
|
|
_vatom[i] += ENERGY_F(0.5) * sharedV[0 * blockDim.x];
|
|
_vatom[i + _nmax] += ENERGY_F(0.5) * sharedV[1 * blockDim.x];
|
|
_vatom[i + 2 * _nmax] += ENERGY_F(0.5) * sharedV[2 * blockDim.x];
|
|
_vatom[i + 3 * _nmax] += ENERGY_F(0.5) * sharedV[3 * blockDim.x];
|
|
_vatom[i + 4 * _nmax] += ENERGY_F(0.5) * sharedV[4 * blockDim.x];
|
|
_vatom[i + 5 * _nmax] += ENERGY_F(0.5) * sharedV[5 * blockDim.x];
|
|
}
|
|
|
|
if(vflag || eflag) PairVirialCompute_A_Kernel(eflag, vflag, 0);
|
|
}
|