git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7426 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2012-01-06 17:13:10 +00:00
parent bbdb300778
commit aac0e5699f
3 changed files with 32 additions and 16 deletions

View File

@ -53,9 +53,10 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
const int nzhi_out, grdtyp **rho_coeff,
grdtyp **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm,
const int nz_pppm, int &flag) {
const int nz_pppm, const bool split, int &flag) {
_max_bytes=10;
screen=_screen;
_kspace_split=split;
bool success=true;
flag=device->init(*ans,nlocal,nall);
@ -359,10 +360,10 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
time_interp.stop();
ans->copy_answers(false,false,false,false);
device->add_ans_object(ans);
if (_kspace_split==false)
device->add_ans_object(ans);
}
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
double PPPMT::host_memory_usage() const {
return device->atom.host_memory_usage()+

View File

@ -48,7 +48,8 @@ class PPPM {
const int nxhi_out, const int nyhi_out, const int nzhi_out,
grdtyp **rho_coeff, grdtyp **vd_brick,
const double slab_volfactor, const int nx_pppm,
const int ny_pppm, const int nz_pppm, int &success);
const int ny_pppm, const int nz_pppm, const bool split,
int &success);
/// Check if there is enough storage for atom arrays and realloc if not
/** \param success set to false if insufficient memory **/
@ -174,7 +175,7 @@ class PPPM {
UCL_Texture q_tex;
protected:
bool _allocated, _compiled, _precompute_done;
bool _allocated, _compiled, _precompute_done, _kspace_split;
int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
double _max_bytes, _max_an_bytes;
double _cpu_idle_time;

View File

@ -36,7 +36,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
const int nzhi_out, grdtyp **rho_coeff,
grdtyp **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm, const int nz_pppm,
int &success) {
const bool split, int &success) {
pppm.clear(0.0);
int first_gpu=pppm.device->first_device();
int last_gpu=pppm.device->last_device();
@ -60,7 +60,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
if (world_me==0)
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out,
nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
pppm.device->world_barrier();
if (message)
@ -79,7 +79,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
success);
split,success);
pppm.device->gpu_barrier();
if (message)
@ -97,11 +97,12 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
const int nzhi_out, float **rho_coeff,
float **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm, const int nz_pppm,
int &success) {
const bool split, int &success) {
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
PPPMF.device->set_single_precompute(&PPPMF);
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
if (split==false)
PPPMF.device->set_single_precompute(&PPPMF);
return b;
}
@ -118,13 +119,19 @@ int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
}
void pppm_gpu_interp_f(const float qqrd2e_scale) {
return PPPMF.interp(qqrd2e_scale);
PPPMF.interp(qqrd2e_scale);
}
double pppm_gpu_bytes_f() {
return PPPMF.host_memory_usage();
}
void pppm_gpu_forces_f(double **f) {
double etmp;
PPPMF.atom->data_unavail();
PPPMF.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
}
double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
@ -132,12 +139,13 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
const int nzhi_out, double **rho_coeff,
double **vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm,
const int nz_pppm, int &success) {
const int nz_pppm, const bool split, int &success) {
double *b=pppm_gpu_init(PPPMD,nlocal,nall,screen,order,nxlo_out,nylo_out,
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
success);
PPPMF.device->set_double_precompute(&PPPMD);
split,success);
if (split==false)
PPPMD.device->set_double_precompute(&PPPMD);
return b;
}
@ -154,10 +162,16 @@ int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
}
void pppm_gpu_interp_d(const double qqrd2e_scale) {
return PPPMD.interp(qqrd2e_scale);
PPPMD.interp(qqrd2e_scale);
}
double pppm_gpu_bytes_d() {
return PPPMD.host_memory_usage();
}
void pppm_gpu_forces_d(double **f) {
double etmp;
PPPMD.atom->data_unavail();
PPPMD.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
}