forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6625 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
7bf99d0e58
commit
3c1cefdba0
|
@ -83,7 +83,11 @@ NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
|
|||
when attempting to run PPPM on a GPU with compute capability 1.0.
|
||||
|
||||
NOTE: Double precision is only supported on certain GPUs (with
|
||||
compute capability>=1.3).
|
||||
compute capability>=1.3). If you compile the GPU library for
|
||||
a GPU with compute capability 1.1 and 1.2, then only single
|
||||
precistion FFTs are supported, i.e. LAMMPS has to be compiled
|
||||
with -DFFT_SINGLE. For details on configuring FFT support in
|
||||
LAMMPS, see http://lammps.sandia.gov/doc/Section_start.html#2_2_4
|
||||
|
||||
NOTE: For Tesla and other graphics cards with compute capability>=1.3,
|
||||
make sure that -arch=sm_13 is set on the CUDA_ARCH line.
|
||||
|
@ -96,9 +100,8 @@ NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
|||
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
|
||||
installed if the USER-CG-CMM package has been installed.
|
||||
|
||||
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
|
||||
pppm/gpu/double styles will only be installed if the KSPACE package has
|
||||
been installed.
|
||||
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, and pppm/gpu styles
|
||||
will only be installed if the KSPACE package has been installed.
|
||||
|
||||
NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
|
||||
has been installed.
|
||||
|
|
|
@ -200,10 +200,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -212,7 +210,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].y) {
|
||||
energy += factor_lj*inv1*(lj3[mtype].y*inv2-lj3[mtype].z)-
|
||||
lj3[mtype].w;
|
||||
|
@ -394,10 +393,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -406,7 +403,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].y) {
|
||||
energy += factor_lj*inv1*(lj3[mtype].y*inv2-lj3[mtype].z)-
|
||||
lj3[mtype].w;
|
||||
|
|
|
@ -204,10 +204,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -216,7 +214,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_ljsq) {
|
||||
numtyp e=r6inv*(lj1[mtype].z*r6inv-lj1[mtype].w);
|
||||
if (rsq > cut_lj_innersq)
|
||||
|
@ -407,10 +406,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -419,7 +416,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_ljsq) {
|
||||
numtyp e=lj3-lj4;
|
||||
if (rsq > cut_lj_innersq)
|
||||
|
|
|
@ -191,10 +191,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -203,7 +201,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r3inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
|
@ -379,10 +378,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -391,7 +388,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r3inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
|
|
|
@ -191,10 +191,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -203,7 +201,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
|
@ -377,10 +376,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else {
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
prefactor = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force = (force_lj + forcecoul) * r2inv;
|
||||
|
||||
|
@ -389,7 +386,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
|
|
|
@ -144,7 +144,7 @@ __kernel void particle_map(__global numtyp4 *x_, __global numtyp *q_,
|
|||
|
||||
int i=nz*nlocal_y*nlocal_x+ny*nlocal_x+nx;
|
||||
int old=atom_add(counts+i, 1);
|
||||
if (old==max_atoms) {
|
||||
if (old>=max_atoms) {
|
||||
*error=2;
|
||||
atom_add(counts+i, -1);
|
||||
} else
|
||||
|
|
|
@ -51,7 +51,7 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
|
|||
const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out,
|
||||
const int nzhi_out, double **rho_coeff,
|
||||
const int nzhi_out, grdtyp **rho_coeff,
|
||||
grdtyp **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm,
|
||||
const int nz_pppm, int &flag) {
|
||||
|
@ -123,7 +123,7 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
|
|||
int numel=order*( order/2 - n2lo + 1 );
|
||||
success=success && (d_rho_coeff.alloc(numel,*ucl_device,UCL_READ_ONLY)==
|
||||
UCL_SUCCESS);
|
||||
UCL_H_Vec<double> view;
|
||||
UCL_H_Vec<grdtyp> view;
|
||||
view.view(rho_coeff[0]+n2lo,numel,*ucl_device);
|
||||
ucl_copy(d_rho_coeff,view,true);
|
||||
_max_bytes+=d_rho_coeff.row_bytes();
|
||||
|
|
|
@ -46,7 +46,7 @@ class PPPMGPUMemory {
|
|||
grdtyp * init(const int nlocal, const int nall, FILE *screen, const int order,
|
||||
const int nxlo_out, const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out, const int nzhi_out,
|
||||
double **rho_coeff, grdtyp **vd_brick,
|
||||
grdtyp **rho_coeff, grdtyp **vd_brick,
|
||||
const double slab_volfactor, const int nx_pppm,
|
||||
const int ny_pppm, const int nz_pppm, int &success);
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||
FILE *screen, const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out,
|
||||
const int nzhi_out, double **rho_coeff,
|
||||
const int nzhi_out, grdtyp **rho_coeff,
|
||||
grdtyp **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||
int &success) {
|
||||
|
@ -95,7 +95,7 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
|||
const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out,
|
||||
const int nzhi_out, double **rho_coeff,
|
||||
const int nzhi_out, float **rho_coeff,
|
||||
float **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||
int &success) {
|
||||
|
|
Loading…
Reference in New Issue