forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7426 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
bbdb300778
commit
aac0e5699f
|
@ -53,9 +53,10 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
|
|||
const int nzhi_out, grdtyp **rho_coeff,
|
||||
grdtyp **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm,
|
||||
const int nz_pppm, int &flag) {
|
||||
const int nz_pppm, const bool split, int &flag) {
|
||||
_max_bytes=10;
|
||||
screen=_screen;
|
||||
_kspace_split=split;
|
||||
bool success=true;
|
||||
|
||||
flag=device->init(*ans,nlocal,nall);
|
||||
|
@ -359,10 +360,10 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
|
|||
time_interp.stop();
|
||||
|
||||
ans->copy_answers(false,false,false,false);
|
||||
device->add_ans_object(ans);
|
||||
if (_kspace_split==false)
|
||||
device->add_ans_object(ans);
|
||||
}
|
||||
|
||||
|
||||
template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
|
||||
double PPPMT::host_memory_usage() const {
|
||||
return device->atom.host_memory_usage()+
|
||||
|
|
|
@ -48,7 +48,8 @@ class PPPM {
|
|||
const int nxhi_out, const int nyhi_out, const int nzhi_out,
|
||||
grdtyp **rho_coeff, grdtyp **vd_brick,
|
||||
const double slab_volfactor, const int nx_pppm,
|
||||
const int ny_pppm, const int nz_pppm, int &success);
|
||||
const int ny_pppm, const int nz_pppm, const bool split,
|
||||
int &success);
|
||||
|
||||
/// Check if there is enough storage for atom arrays and realloc if not
|
||||
/** \param success set to false if insufficient memory **/
|
||||
|
@ -174,7 +175,7 @@ class PPPM {
|
|||
UCL_Texture q_tex;
|
||||
|
||||
protected:
|
||||
bool _allocated, _compiled, _precompute_done;
|
||||
bool _allocated, _compiled, _precompute_done, _kspace_split;
|
||||
int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
|
||||
double _max_bytes, _max_an_bytes;
|
||||
double _cpu_idle_time;
|
||||
|
|
|
@ -36,7 +36,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||
const int nzhi_out, grdtyp **rho_coeff,
|
||||
grdtyp **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||
int &success) {
|
||||
const bool split, int &success) {
|
||||
pppm.clear(0.0);
|
||||
int first_gpu=pppm.device->first_device();
|
||||
int last_gpu=pppm.device->last_device();
|
||||
|
@ -60,7 +60,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||
if (world_me==0)
|
||||
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,nzlo_out,
|
||||
nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
|
||||
|
||||
pppm.device->world_barrier();
|
||||
if (message)
|
||||
|
@ -79,7 +79,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
|||
host_brick=pppm.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||
success);
|
||||
split,success);
|
||||
|
||||
pppm.device->gpu_barrier();
|
||||
if (message)
|
||||
|
@ -97,11 +97,12 @@ float * pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
|||
const int nzhi_out, float **rho_coeff,
|
||||
float **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm, const int nz_pppm,
|
||||
int &success) {
|
||||
const bool split, int &success) {
|
||||
float *b=pppm_gpu_init(PPPMF,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,vd_brick,
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
||||
PPPMF.device->set_single_precompute(&PPPMF);
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,split,success);
|
||||
if (split==false)
|
||||
PPPMF.device->set_single_precompute(&PPPMF);
|
||||
return b;
|
||||
}
|
||||
|
||||
|
@ -118,13 +119,19 @@ int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
|
|||
}
|
||||
|
||||
void pppm_gpu_interp_f(const float qqrd2e_scale) {
|
||||
return PPPMF.interp(qqrd2e_scale);
|
||||
PPPMF.interp(qqrd2e_scale);
|
||||
}
|
||||
|
||||
double pppm_gpu_bytes_f() {
|
||||
return PPPMF.host_memory_usage();
|
||||
}
|
||||
|
||||
void pppm_gpu_forces_f(double **f) {
|
||||
double etmp;
|
||||
PPPMF.atom->data_unavail();
|
||||
PPPMF.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
|
||||
}
|
||||
|
||||
double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
||||
const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
|
@ -132,12 +139,13 @@ double * pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
|||
const int nzhi_out, double **rho_coeff,
|
||||
double **vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm,
|
||||
const int nz_pppm, int &success) {
|
||||
const int nz_pppm, const bool split, int &success) {
|
||||
double *b=pppm_gpu_init(PPPMD,nlocal,nall,screen,order,nxlo_out,nylo_out,
|
||||
nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff,
|
||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||
success);
|
||||
PPPMF.device->set_double_precompute(&PPPMD);
|
||||
split,success);
|
||||
if (split==false)
|
||||
PPPMD.device->set_double_precompute(&PPPMD);
|
||||
return b;
|
||||
}
|
||||
|
||||
|
@ -154,10 +162,16 @@ int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
|
|||
}
|
||||
|
||||
void pppm_gpu_interp_d(const double qqrd2e_scale) {
|
||||
return PPPMD.interp(qqrd2e_scale);
|
||||
PPPMD.interp(qqrd2e_scale);
|
||||
}
|
||||
|
||||
double pppm_gpu_bytes_d() {
|
||||
return PPPMD.host_memory_usage();
|
||||
}
|
||||
|
||||
void pppm_gpu_forces_d(double **f) {
|
||||
double etmp;
|
||||
PPPMD.atom->data_unavail();
|
||||
PPPMD.ans->get_answers(f,NULL,NULL,NULL,NULL,etmp);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue