git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6625 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-08-08 19:51:22 +00:00
parent 7bf99d0e58
commit 3c1cefdba0
9 changed files with 37 additions and 42 deletions

View File

@ -83,7 +83,11 @@ NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
when attempting to run PPPM on a GPU with compute capability 1.0.
NOTE: Double precision is only supported on certain GPUs (with
compute capability>=1.3).
compute capability>=1.3). If you compile the GPU library for
a GPU with compute capability 1.1 and 1.2, then only single
precistion FFTs are supported, i.e. LAMMPS has to be compiled
with -DFFT_SINGLE. For details on configuring FFT support in
LAMMPS, see http://lammps.sandia.gov/doc/Section_start.html#2_2_4
NOTE: For Tesla and other graphics cards with compute capability>=1.3,
make sure that -arch=sm_13 is set on the CUDA_ARCH line.
@ -96,9 +100,8 @@ NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
installed if the USER-CG-CMM package has been installed.
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
pppm/gpu/double styles will only be installed if the KSPACE package has
been installed.
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, and pppm/gpu styles
will only be installed if the KSPACE package has been installed.
NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
has been installed.

View File

@ -200,10 +200,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -212,7 +210,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].y) {
energy += factor_lj*inv1*(lj3[mtype].y*inv2-lj3[mtype].z)-
lj3[mtype].w;
@ -394,10 +393,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -406,7 +403,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].y) {
energy += factor_lj*inv1*(lj3[mtype].y*inv2-lj3[mtype].z)-
lj3[mtype].w;

View File

@ -204,10 +204,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -216,7 +214,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_ljsq) {
numtyp e=r6inv*(lj1[mtype].z*r6inv-lj1[mtype].w);
if (rsq > cut_lj_innersq)
@ -407,10 +406,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -419,7 +416,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_ljsq) {
numtyp e=lj3-lj4;
if (rsq > cut_lj_innersq)

View File

@ -191,10 +191,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -203,7 +201,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].w) {
numtyp e=r6inv*(lj3[mtype].x*r3inv-lj3[mtype].y);
energy+=factor_lj*(e-lj3[mtype].z);
@ -379,10 +378,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -391,7 +388,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].w) {
numtyp e=r6inv*(lj3[mtype].x*r3inv-lj3[mtype].y);
energy+=factor_lj*(e-lj3[mtype].z);

View File

@ -191,10 +191,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -203,7 +201,8 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].w) {
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
energy+=factor_lj*(e-lj3[mtype].z);
@ -377,10 +376,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*fetch_q(j,q_)/r;
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
} else {
} else
forcecoul = (numtyp)0.0;
prefactor = (numtyp)0.0;
}
force = (force_lj + forcecoul) * r2inv;
@ -389,7 +386,8 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < cut_coulsq)
e_coul += prefactor*(_erfc-factor_coul);
if (rsq < lj1[mtype].w) {
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
energy+=factor_lj*(e-lj3[mtype].z);

View File

@ -144,7 +144,7 @@ __kernel void particle_map(__global numtyp4 *x_, __global numtyp *q_,
int i=nz*nlocal_y*nlocal_x+ny*nlocal_x+nx;
int old=atom_add(counts+i, 1);
if (old==max_atoms) {
if (old>=max_atoms) {
*error=2;
atom_add(counts+i, -1);
} else

View File

@ -51,7 +51,7 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out,
const int nzhi_out, double **rho_coeff,
const int nzhi_out, grdtyp **rho_coeff,
grdtyp **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm,
const int nz_pppm, int &flag) {
@ -123,7 +123,7 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
int numel=order*( order/2 - n2lo + 1 );
success=success && (d_rho_coeff.alloc(numel,*ucl_device,UCL_READ_ONLY)==
UCL_SUCCESS);
UCL_H_Vec<double> view;
UCL_H_Vec<grdtyp> view;
view.view(rho_coeff[0]+n2lo,numel,*ucl_device);
ucl_copy(d_rho_coeff,view,true);
_max_bytes+=d_rho_coeff.row_bytes();

View File

@ -46,7 +46,7 @@ class PPPMGPUMemory {
grdtyp * init(const int nlocal, const int nall, FILE *screen, const int order,
const int nxlo_out, const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out, const int nzhi_out,
double **rho_coeff, grdtyp **vd_brick,
grdtyp **rho_coeff, grdtyp **vd_brick,
const double slab_volfactor, const int nx_pppm,
const int ny_pppm, const int nz_pppm, int &success);

View File

@ -34,7 +34,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
FILE *screen, const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out,
const int nzhi_out, double **rho_coeff,
const int nzhi_out, grdtyp **rho_coeff,
grdtyp **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm, const int nz_pppm,
int &success) {
@ -95,7 +95,7 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out,
const int nzhi_out, double **rho_coeff,
const int nzhi_out, float **rho_coeff,
float **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm, const int nz_pppm,
int &success) {