forked from lijiext/lammps
Separate the computation of hneigh into another kernel
Simplify the main GPU kernel and add another kernel 'k_pair_reneigh'. It works good on GTX1070 (Pascal), but still there is a problem with non-deterministic results on Volta. I reimplement BaseCharge::compute methods in the child class LJ_TIP4PLong to correctly embed a new kernel in the code. Also commit includes some codestyle fixes.
This commit is contained in:
parent
66a076b819
commit
a2f9fa8e78
|
@ -24,26 +24,26 @@ const char *lj_tip4p=0;
|
|||
#include "lal_lj_tip4p_long.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define LJ_TIP4PLong_T LJ_TIP4PLong<numtyp, acctyp>
|
||||
#define LJTIP4PLongT LJ_TIP4PLong<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJ_TIP4PLong<numtyp, acctyp>::LJ_TIP4PLong(): BaseCharge<numtyp,acctyp>(), _allocated(false) {
|
||||
LJTIP4PLongT::LJ_TIP4PLong(): BaseCharge<numtyp,acctyp>(), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJ_TIP4PLong<numtyp, acctyp>::~LJ_TIP4PLong() {
|
||||
LJTIP4PLongT::~LJ_TIP4PLong() {
|
||||
clear();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int LJ_TIP4PLong<numtyp, acctyp>::bytes_per_atom(const int max_nbors) const {
|
||||
int LJTIP4PLongT::bytes_per_atom(const int max_nbors) const {
|
||||
return this->bytes_per_atom_atomic(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
||||
int LJTIP4PLongT::init(const int ntypes,
|
||||
double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset,
|
||||
|
@ -65,6 +65,7 @@ int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
|||
if (success!=0)
|
||||
return success;
|
||||
k_pair_distrib.set_function(*this->pair_program,"k_lj_tip4p_long_distrib");
|
||||
k_pair_reneigh.set_function(*this->pair_program,"k_lj_tip4p_reneigh");
|
||||
|
||||
TypeH = tH;
|
||||
TypeO = tO;
|
||||
|
@ -143,7 +144,7 @@ int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
|||
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJ_TIP4PLong<numtyp, acctyp>::clear() {
|
||||
void LJTIP4PLongT::clear() {
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
|
@ -161,12 +162,13 @@ void LJ_TIP4PLong<numtyp, acctyp>::clear() {
|
|||
//force_comp.clear();
|
||||
|
||||
k_pair_distrib.clear();
|
||||
k_pair_reneigh.clear();
|
||||
|
||||
this->clear_atomic();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double LJ_TIP4PLong<numtyp, acctyp>::host_memory_usage() const {
|
||||
double LJTIP4PLongT::host_memory_usage() const {
|
||||
return this->host_memory_usage_atomic()+sizeof(LJ_TIP4PLong<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
|
@ -174,7 +176,7 @@ double LJ_TIP4PLong<numtyp, acctyp>::host_memory_usage() const {
|
|||
// Calculate energies, forces, and torques
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
||||
void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) {
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=this->block_size();
|
||||
int eflag, vflag;
|
||||
|
@ -188,13 +190,24 @@ void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
|||
else
|
||||
vflag=0;
|
||||
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
const int nall = this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
int GX;
|
||||
|
||||
if (t_ago == 0) {
|
||||
GX=static_cast<int>(ceil(static_cast<double>(nall)/BX));
|
||||
this->k_pair_reneigh.set_size(GX,BX);
|
||||
this->k_pair_reneigh.run(&this->atom->x,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&nall, &ainum,&nbor_pitch, &this->_threads_per_atom,
|
||||
&hneight, &m, &TypeO, &TypeH,
|
||||
&tag, &map_array, &atom_sametag);
|
||||
}
|
||||
|
||||
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
this->k_pair.set_size(GX,BX);
|
||||
if (vflag){
|
||||
this->ansO.resize_ib(ainum*3);
|
||||
|
@ -213,17 +226,16 @@ void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
|||
&atom_sametag, &this->ansO);
|
||||
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
||||
this->k_pair_distrib.set_size(GX,BX);
|
||||
this->k_pair_distrib.run(&this->atom->x, &this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom,
|
||||
&hneight, &m, &TypeO, &TypeH, &alpha,
|
||||
&this->atom->q, &this->ansO);
|
||||
this->k_pair_distrib.run(&this->atom->x, &this->ans->force, &this->ans->engv,
|
||||
&eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom,
|
||||
&hneight, &m, &TypeO, &TypeH, &alpha,&this->atom->q, &this->ansO);
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJ_TIP4PLong<numtyp, acctyp>::copy_relations_data(int **hn, double **newsite, int n,
|
||||
int* tag, int *map_array, int map_size, int *sametag, int max_same, int ago){
|
||||
void LJTIP4PLongT::copy_relations_data(int n, int* tag, int *map_array,
|
||||
int map_size, int *sametag, int max_same, int ago){
|
||||
int nall = n;
|
||||
const int hn_sz = n*4; // matrix size = col size * col number
|
||||
hneight.resize_ib(hn_sz);
|
||||
|
@ -250,4 +262,118 @@ void LJ_TIP4PLong<numtyp, acctyp>::copy_relations_data(int **hn, double **newsit
|
|||
host_tag_write.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy nbor list from host if necessary and then calculate forces, virials,..
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void LJTIP4PLongT::compute(const int f_ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, const double cpu_time,
|
||||
bool &success, double *host_q,
|
||||
const int nlocal, double *boxlo, double *prd) {
|
||||
this->acc_timers();
|
||||
if (inum_full==0) {
|
||||
host_start=0;
|
||||
// Make sure textures are correct if realloc by a different hybrid style
|
||||
this->resize_atom(0,nall,success);
|
||||
this->zero_timers();
|
||||
return;
|
||||
}
|
||||
|
||||
int ago=this->hd_balancer.ago_first(f_ago);
|
||||
int inum=this->hd_balancer.balance(ago,inum_full,cpu_time);
|
||||
this->ans->inum(inum);
|
||||
host_start=inum;
|
||||
|
||||
if (ago==0) {
|
||||
this->reset_nbors(nall, inum, ilist, numj, firstneigh, success);
|
||||
if (!success)
|
||||
return;
|
||||
}
|
||||
|
||||
this->atom->cast_x_data(host_x,host_type);
|
||||
this->atom->cast_q_data(host_q);
|
||||
this->hd_balancer.start_timer();
|
||||
this->atom->add_x_data(host_x,host_type);
|
||||
this->atom->add_q_data();
|
||||
|
||||
this->device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
|
||||
boxlo, prd);
|
||||
|
||||
t_ago = ago;
|
||||
loop(eflag,vflag);
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist);
|
||||
this->device->add_ans_object(this->ans);
|
||||
this->hd_balancer.stop_timer();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int** LJTIP4PLongT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int *map_array, int map_size, int *sametag, int max_same,
|
||||
int **nspecial, tagint **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
double *host_q, double *boxlo, double *prd) {
|
||||
this->acc_timers();
|
||||
if (inum_full==0) {
|
||||
host_start=0;
|
||||
// Make sure textures are correct if realloc by a different hybrid style
|
||||
this->resize_atom(0,nall,success);
|
||||
this->zero_timers();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
this->hd_balancer.balance(cpu_time);
|
||||
int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
|
||||
this->ans->inum(inum);
|
||||
host_start=inum;
|
||||
|
||||
// Build neighbor list on GPU if necessary
|
||||
if (ago==0) {
|
||||
this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
|
||||
sublo, subhi, tag, nspecial, special, success);
|
||||
if (!success)
|
||||
return NULL;
|
||||
this->atom->cast_q_data(host_q);
|
||||
this->hd_balancer.start_timer();
|
||||
} else {
|
||||
this->atom->cast_x_data(host_x,host_type);
|
||||
this->atom->cast_q_data(host_q);
|
||||
this->hd_balancer.start_timer();
|
||||
this->atom->add_x_data(host_x,host_type);
|
||||
}
|
||||
this->atom->add_q_data();
|
||||
*ilist=this->nbor->host_ilist.begin();
|
||||
*jnum=this->nbor->host_acc.begin();
|
||||
|
||||
copy_relations_data(nall, tag, map_array, map_size, sametag, max_same, ago);
|
||||
|
||||
this->device->precompute(ago,inum_full,nall,host_x,host_type,success,host_q,
|
||||
boxlo, prd);
|
||||
|
||||
t_ago = ago;
|
||||
loop(eflag,vflag);
|
||||
this->ans->copy_answers(eflag,vflag,eatom,vatom);
|
||||
this->device->add_ans_object(this->ans);
|
||||
this->hd_balancer.stop_timer();
|
||||
|
||||
return this->nbor->host_jlist.begin()-host_start;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template class LJ_TIP4PLong<PRECISION,ACC_PRECISION>;
|
||||
|
|
|
@ -158,6 +158,62 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const int nall, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
__global int *restrict hneigh,
|
||||
__global numtyp4 *restrict m,
|
||||
const int typeO, const int typeH,
|
||||
const __global int *restrict tag, const __global int *restrict map,
|
||||
const __global int *restrict sametag) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid;
|
||||
|
||||
if (i<nall) {
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
|
||||
int iH1, iH2, iO;
|
||||
int itype = ix.w;
|
||||
if(itype == typeO) {
|
||||
iO = i;
|
||||
if (hneigh[i*4+2] != -1) {
|
||||
iH1 = atom_mapping(map, tag[i] + 1);
|
||||
iH2 = atom_mapping(map, tag[i] + 2);
|
||||
// set iH1,iH2 to closest image to O
|
||||
iH1 = closest_image(i, iH1, sametag, x_);
|
||||
iH2 = closest_image(i, iH2, sametag, x_);
|
||||
hneigh[i*4 ] = iH1;
|
||||
hneigh[i*4+1] = iH2;
|
||||
hneigh[i*4+2] = -1;
|
||||
}
|
||||
} else {
|
||||
if (hneigh[i*4+2] != -1) {
|
||||
int iI, iH;
|
||||
iI = atom_mapping(map,tag[i] - 1);
|
||||
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
|
||||
if ((int)iIx.w == typeH) {
|
||||
iO = atom_mapping(map,tag[i] - 2);
|
||||
iO = closest_image(i, iO, sametag, x_);
|
||||
iH1 = closest_image(i, iI, sametag, x_);
|
||||
iH2 = i;
|
||||
} else { //if ((int)iIx.w == typeO)
|
||||
iH = atom_mapping(map, tag[i] + 1);
|
||||
iO = closest_image(i,iI,sametag, x_);
|
||||
iH1 = i;
|
||||
iH2 = closest_image(i,iH,sametag, x_);
|
||||
}
|
||||
hneigh[i*4+0] = iO;
|
||||
hneigh[i*4+1] += -1;
|
||||
hneigh[i*4+2] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
|
@ -211,63 +267,17 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||
|
||||
if(itype == typeO) {
|
||||
iO = i;
|
||||
if (hneigh[i*4+2] != -1) {
|
||||
iH1 = atom_mapping(map, tag[i] + 1);
|
||||
iH2 = atom_mapping(map, tag[i] + 2);
|
||||
// set iH1,iH2 to closest image to O
|
||||
iH1 = closest_image(i, iH1, sametag, x_);
|
||||
iH2 = closest_image(i, iH2, sametag, x_);
|
||||
hneigh[ i*4 ] = iH1;
|
||||
hneigh[ i*4+1] = iH2;
|
||||
hneigh[ i*4+2] = -1;
|
||||
hneigh[iH1*4 ] = i;
|
||||
hneigh[iH1*4+1] += -1;
|
||||
hneigh[iH1*4+2] = -1;
|
||||
hneigh[iH2*4 ] = i;
|
||||
hneigh[iH2*4+1] += -1;
|
||||
hneigh[iH2*4+2] = -1;
|
||||
} else {
|
||||
iH1 = hneigh[i*4 ];
|
||||
iH2 = hneigh[i*4+1];
|
||||
}
|
||||
if(fabs(m[iO].w) <= eq_zero) {
|
||||
compute_newsite(iO,iH1,iH2, &m[iO], alpha, x_);
|
||||
m[iO].w = qtmp;
|
||||
}
|
||||
x1 = m[iO];
|
||||
} else {
|
||||
if (hneigh[i*4+2] != -1) {
|
||||
int iI, iH;
|
||||
iI = atom_mapping(map,tag[i] - 1);
|
||||
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
|
||||
if ((int)iIx.w == typeH) {
|
||||
iO = atom_mapping(map,tag[i] - 2);
|
||||
iO = closest_image(i, iO, sametag, x_);
|
||||
iH1 = closest_image(i, iI, sametag, x_);
|
||||
iH2 = i;
|
||||
} else { //if ((int)iIx.w == typeO)
|
||||
iH = atom_mapping(map, tag[i] + 1);
|
||||
iO = closest_image(i,iI,sametag, x_);
|
||||
iH1 = i;
|
||||
iH2 = closest_image(i,iH,sametag, x_);
|
||||
}
|
||||
|
||||
hneigh[iH1*4+0] = iO;
|
||||
hneigh[iH1*4+1] += -1;
|
||||
hneigh[iH1*4+2] = -1;
|
||||
|
||||
hneigh[iH2*4+0] = iO;
|
||||
hneigh[iH2*4+1] += -1;
|
||||
hneigh[iH2*4+2] = -1;
|
||||
|
||||
hneigh[iO*4+0] = iH1;
|
||||
hneigh[iO*4+1] = iH2;
|
||||
hneigh[iO*4+2] = -1;
|
||||
} else {
|
||||
iO = hneigh[i *4 ];
|
||||
iH1 = hneigh[iO*4 ];
|
||||
iH2 = hneigh[iO*4+1];
|
||||
}
|
||||
if (iO >= inum) {
|
||||
non_local_oxy = 1;
|
||||
if(fabs(m[iO].w) <= eq_zero) {
|
||||
|
@ -327,25 +337,8 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||
if(itype == typeO || jtype == typeO) {
|
||||
if (jtype == typeO) {
|
||||
jO = j;
|
||||
if (hneigh[j*4+2] != -1) {
|
||||
jH1 = atom_mapping(map,tag[j] + 1);
|
||||
jH2 = atom_mapping(map,tag[j] + 2);
|
||||
// set iH1,iH2 to closest image to O
|
||||
jH1 = closest_image(j, jH1, sametag, x_);
|
||||
jH2 = closest_image(j, jH2, sametag, x_);
|
||||
hneigh[j*4 ] = jH1;
|
||||
hneigh[j*4+1] = jH2;
|
||||
hneigh[j*4+2] = -1;
|
||||
hneigh[jH1*4 ] = j;
|
||||
hneigh[jH1*4+1] += -1;
|
||||
hneigh[jH1*4+2] = -1;
|
||||
hneigh[jH2*4 ] = j;
|
||||
hneigh[jH2*4+1] += -1;
|
||||
hneigh[jH2*4+2] = -1;
|
||||
} else {
|
||||
jH1 = hneigh[j*4 ];
|
||||
jH2 = hneigh[j*4+1];
|
||||
}
|
||||
if (fabs(m[j].w) <= eq_zero) {
|
||||
compute_newsite(j, jH1, jH2, &m[j], alpha, x_);
|
||||
m[j].w = qj;
|
||||
|
@ -405,7 +398,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
||||
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||
vdj.w = vdj.w;
|
||||
//vdj.w = vdj.w;
|
||||
virial[0] += (ix.x - vdj.x)*fd.x;
|
||||
virial[1] += (ix.y - vdj.y)*fd.y;
|
||||
virial[2] += (ix.z - vdj.z)*fd.z;
|
||||
|
@ -422,7 +415,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||
vdi.x = xO.x*cO + xH1.x*cH + xH2.x*cH;
|
||||
vdi.y = xO.y*cO + xH1.y*cH + xH2.y*cH;
|
||||
vdi.z = xO.z*cO + xH1.z*cH + xH2.z*cH;
|
||||
vdi.w = vdi.w;
|
||||
//vdi.w = vdi.w;
|
||||
if (jtype != typeH){
|
||||
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
|
||||
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
|
||||
|
@ -430,7 +423,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
|||
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
||||
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||
vdj.w = vdj.w;
|
||||
//vdj.w = vdj.w;
|
||||
} else vdj = jx;
|
||||
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
|
||||
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
|
||||
|
|
|
@ -62,10 +62,29 @@ public:
|
|||
/// Total host memory used by library for pair style
|
||||
double host_memory_usage() const;
|
||||
|
||||
/// Copy data which is easier to compute in LAMMPS_NS
|
||||
void copy_relations_data(int **hn, double **m, int n,int* tag, int *map_array, int map_size,
|
||||
/// Copy data from LAMMPS_NS
|
||||
void copy_relations_data(int n,int* tag, int *map_array, int map_size,
|
||||
int *sametag, int max_same, int ago);
|
||||
|
||||
/// Reimplement BaseCharge pair loop with host neighboring
|
||||
void compute(const int f_ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *charge,
|
||||
const int nlocal, double *boxlo, double *prd);
|
||||
|
||||
/// Reimplement BaseCharge pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag,int *map_array, int map_size, int *sametag, int max_same,
|
||||
int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double *charge, double *boxlo, double *prd);
|
||||
|
||||
|
||||
// --------------------------- TYPE DATA --------------------------
|
||||
|
||||
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw
|
||||
|
@ -77,7 +96,6 @@ public:
|
|||
/// Special LJ values [0-3] and Special Coul values [4-7]
|
||||
UCL_D_Vec<numtyp> sp_lj;
|
||||
|
||||
/// If atom type constants fit in shared memory, use fast kernels
|
||||
bool shared_types;
|
||||
|
||||
/// Number of atom types
|
||||
|
@ -98,10 +116,11 @@ public:
|
|||
UCL_D_Vec<int> map_array;
|
||||
UCL_D_Vec<int> atom_sametag;
|
||||
|
||||
UCL_Kernel k_pair_distrib;
|
||||
UCL_Kernel k_pair_distrib, k_pair_reneigh;
|
||||
|
||||
private:
|
||||
bool _allocated;
|
||||
int t_ago;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
|
|
|
@ -112,14 +112,18 @@ void ljtip4p_long_gpu_clear() {
|
|||
|
||||
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
double *sublo, double *subhi,
|
||||
tagint *tag, int *map_array, int map_size,
|
||||
int *sametag, int max_same,
|
||||
int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
double *prd) {
|
||||
return LJTIP4PLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
subhi, tag, map_array, map_size, sametag, max_same,
|
||||
nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q,boxlo, prd);
|
||||
}
|
||||
|
@ -139,10 +143,10 @@ double ljtip4p_long_gpu_bytes() {
|
|||
return LJTIP4PLMF.host_memory_usage();
|
||||
}
|
||||
|
||||
void ljtip4p_long_copy_molecule_data(int **hn, double **m, int n, int* tag,
|
||||
void ljtip4p_long_copy_molecule_data(int n, int* tag,
|
||||
int *map_array, int map_size,
|
||||
int *sametag, int max_same, int ago){
|
||||
LJTIP4PLMF.copy_relations_data(hn, m, n, tag,map_array,map_size,sametag, max_same, ago);
|
||||
LJTIP4PLMF.copy_relations_data(n, tag, map_array, map_size, sametag, max_same, ago);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -64,7 +64,10 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
|||
void ljtip4p_long_gpu_clear();
|
||||
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
double *sublo, double *subhi,
|
||||
tagint *tag, int *map_array, int map_size,
|
||||
int *sametag, int max_same,
|
||||
int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
|
@ -78,7 +81,7 @@ void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall,
|
|||
bool &success, double *host_q, const int nlocal,
|
||||
double *boxlo, double *prd);
|
||||
double ljtip4p_long_gpu_bytes();
|
||||
void ljtip4p_long_copy_molecule_data(int **, double **, int, int* , int *,
|
||||
void ljtip4p_long_copy_molecule_data(int, int* , int *,
|
||||
int, int *, int , int);
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -112,17 +115,16 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
|
|||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
ljtip4p_long_copy_molecule_data(hneigh, newsite, nall, atom->tag,
|
||||
atom->get_map_array(), atom->get_map_size(),
|
||||
atom->sametag, atom->get_max_same(), neighbor->ago);
|
||||
|
||||
bool success = true;
|
||||
int *ilist, *numneigh, **firstneigh;
|
||||
if (gpu_mode != GPU_FORCE) {
|
||||
inum = atom->nlocal;
|
||||
firstneigh = ljtip4p_long_gpu_compute_n(neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
domain->subhi,
|
||||
atom->tag, atom->get_map_array(), atom->get_map_size(),
|
||||
atom->sametag, atom->get_max_same(),
|
||||
atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, &ilist, &numneigh,
|
||||
cpu_time, success, atom->q, domain->boxlo,
|
||||
|
@ -132,6 +134,9 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
|
|||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
ljtip4p_long_copy_molecule_data(nall, atom->tag,
|
||||
atom->get_map_array(), atom->get_map_size(),
|
||||
atom->sametag, atom->get_max_same(), neighbor->ago);
|
||||
ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
||||
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success, atom->q,
|
||||
|
|
Loading…
Reference in New Issue