forked from lijiext/lammps
Separate the computation of hneigh into another kernel
Simplify the main GPU kernel and add another kernel 'k_pair_reneigh'. It works good on GTX1070 (Pascal), but still there is a problem with non-deterministic results on Volta. I reimplement BaseCharge::compute methods in the child class LJ_TIP4PLong to correctly embed a new kernel in the code. Also commit includes some codestyle fixes.
This commit is contained in:
parent
66a076b819
commit
a2f9fa8e78
|
@ -24,26 +24,26 @@ const char *lj_tip4p=0;
|
||||||
#include "lal_lj_tip4p_long.h"
|
#include "lal_lj_tip4p_long.h"
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
using namespace LAMMPS_AL;
|
using namespace LAMMPS_AL;
|
||||||
#define LJ_TIP4PLong_T LJ_TIP4PLong<numtyp, acctyp>
|
#define LJTIP4PLongT LJ_TIP4PLong<numtyp, acctyp>
|
||||||
|
|
||||||
extern Device<PRECISION,ACC_PRECISION> device;
|
extern Device<PRECISION,ACC_PRECISION> device;
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
LJ_TIP4PLong<numtyp, acctyp>::LJ_TIP4PLong(): BaseCharge<numtyp,acctyp>(), _allocated(false) {
|
LJTIP4PLongT::LJ_TIP4PLong(): BaseCharge<numtyp,acctyp>(), _allocated(false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
LJ_TIP4PLong<numtyp, acctyp>::~LJ_TIP4PLong() {
|
LJTIP4PLongT::~LJ_TIP4PLong() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int LJ_TIP4PLong<numtyp, acctyp>::bytes_per_atom(const int max_nbors) const {
|
int LJTIP4PLongT::bytes_per_atom(const int max_nbors) const {
|
||||||
return this->bytes_per_atom_atomic(max_nbors);
|
return this->bytes_per_atom_atomic(max_nbors);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
int LJTIP4PLongT::init(const int ntypes,
|
||||||
double **host_cutsq, double **host_lj1,
|
double **host_cutsq, double **host_lj1,
|
||||||
double **host_lj2, double **host_lj3,
|
double **host_lj2, double **host_lj3,
|
||||||
double **host_lj4, double **host_offset,
|
double **host_lj4, double **host_offset,
|
||||||
|
@ -65,6 +65,7 @@ int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
k_pair_distrib.set_function(*this->pair_program,"k_lj_tip4p_long_distrib");
|
k_pair_distrib.set_function(*this->pair_program,"k_lj_tip4p_long_distrib");
|
||||||
|
k_pair_reneigh.set_function(*this->pair_program,"k_lj_tip4p_reneigh");
|
||||||
|
|
||||||
TypeH = tH;
|
TypeH = tH;
|
||||||
TypeO = tO;
|
TypeO = tO;
|
||||||
|
@ -143,7 +144,7 @@ int LJ_TIP4PLong<numtyp, acctyp>::init(const int ntypes,
|
||||||
|
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void LJ_TIP4PLong<numtyp, acctyp>::clear() {
|
void LJTIP4PLongT::clear() {
|
||||||
if (!_allocated)
|
if (!_allocated)
|
||||||
return;
|
return;
|
||||||
_allocated=false;
|
_allocated=false;
|
||||||
|
@ -161,12 +162,13 @@ void LJ_TIP4PLong<numtyp, acctyp>::clear() {
|
||||||
//force_comp.clear();
|
//force_comp.clear();
|
||||||
|
|
||||||
k_pair_distrib.clear();
|
k_pair_distrib.clear();
|
||||||
|
k_pair_reneigh.clear();
|
||||||
|
|
||||||
this->clear_atomic();
|
this->clear_atomic();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
double LJ_TIP4PLong<numtyp, acctyp>::host_memory_usage() const {
|
double LJTIP4PLongT::host_memory_usage() const {
|
||||||
return this->host_memory_usage_atomic()+sizeof(LJ_TIP4PLong<numtyp,acctyp>);
|
return this->host_memory_usage_atomic()+sizeof(LJ_TIP4PLong<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,7 +176,7 @@ double LJ_TIP4PLong<numtyp, acctyp>::host_memory_usage() const {
|
||||||
// Calculate energies, forces, and torques
|
// Calculate energies, forces, and torques
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
void LJTIP4PLongT::loop(const bool _eflag, const bool _vflag) {
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int BX=this->block_size();
|
const int BX=this->block_size();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
|
@ -188,13 +190,24 @@ void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
||||||
else
|
else
|
||||||
vflag=0;
|
vflag=0;
|
||||||
|
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
|
||||||
(BX/this->_threads_per_atom)));
|
|
||||||
|
|
||||||
int ainum=this->ans->inum();
|
int ainum=this->ans->inum();
|
||||||
|
const int nall = this->atom->nall();
|
||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
int GX;
|
||||||
|
|
||||||
|
if (t_ago == 0) {
|
||||||
|
GX=static_cast<int>(ceil(static_cast<double>(nall)/BX));
|
||||||
|
this->k_pair_reneigh.set_size(GX,BX);
|
||||||
|
this->k_pair_reneigh.run(&this->atom->x,
|
||||||
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
|
&nall, &ainum,&nbor_pitch, &this->_threads_per_atom,
|
||||||
|
&hneight, &m, &TypeO, &TypeH,
|
||||||
|
&tag, &map_array, &atom_sametag);
|
||||||
|
}
|
||||||
|
|
||||||
|
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||||
|
(BX/this->_threads_per_atom)));
|
||||||
this->k_pair.set_size(GX,BX);
|
this->k_pair.set_size(GX,BX);
|
||||||
if (vflag){
|
if (vflag){
|
||||||
this->ansO.resize_ib(ainum*3);
|
this->ansO.resize_ib(ainum*3);
|
||||||
|
@ -213,17 +226,16 @@ void LJ_TIP4PLong<numtyp, acctyp>::loop(const bool _eflag, const bool _vflag) {
|
||||||
&atom_sametag, &this->ansO);
|
&atom_sametag, &this->ansO);
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
||||||
this->k_pair_distrib.set_size(GX,BX);
|
this->k_pair_distrib.set_size(GX,BX);
|
||||||
this->k_pair_distrib.run(&this->atom->x, &this->ans->force, &this->ans->engv, &eflag, &vflag,
|
this->k_pair_distrib.run(&this->atom->x, &this->ans->force, &this->ans->engv,
|
||||||
&ainum, &nbor_pitch, &this->_threads_per_atom,
|
&eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom,
|
||||||
&hneight, &m, &TypeO, &TypeH, &alpha,
|
&hneight, &m, &TypeO, &TypeH, &alpha,&this->atom->q, &this->ansO);
|
||||||
&this->atom->q, &this->ansO);
|
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void LJ_TIP4PLong<numtyp, acctyp>::copy_relations_data(int **hn, double **newsite, int n,
|
void LJTIP4PLongT::copy_relations_data(int n, int* tag, int *map_array,
|
||||||
int* tag, int *map_array, int map_size, int *sametag, int max_same, int ago){
|
int map_size, int *sametag, int max_same, int ago){
|
||||||
int nall = n;
|
int nall = n;
|
||||||
const int hn_sz = n*4; // matrix size = col size * col number
|
const int hn_sz = n*4; // matrix size = col size * col number
|
||||||
hneight.resize_ib(hn_sz);
|
hneight.resize_ib(hn_sz);
|
||||||
|
@ -250,4 +262,118 @@ void LJ_TIP4PLong<numtyp, acctyp>::copy_relations_data(int **hn, double **newsit
|
||||||
host_tag_write.clear();
|
host_tag_write.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Copy nbor list from host if necessary and then calculate forces, virials,..
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
void LJTIP4PLongT::compute(const int f_ago, const int inum_full,
|
||||||
|
const int nall, double **host_x, int *host_type,
|
||||||
|
int *ilist, int *numj, int **firstneigh,
|
||||||
|
const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom,
|
||||||
|
int &host_start, const double cpu_time,
|
||||||
|
bool &success, double *host_q,
|
||||||
|
const int nlocal, double *boxlo, double *prd) {
|
||||||
|
this->acc_timers();
|
||||||
|
if (inum_full==0) {
|
||||||
|
host_start=0;
|
||||||
|
// Make sure textures are correct if realloc by a different hybrid style
|
||||||
|
this->resize_atom(0,nall,success);
|
||||||
|
this->zero_timers();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ago=this->hd_balancer.ago_first(f_ago);
|
||||||
|
int inum=this->hd_balancer.balance(ago,inum_full,cpu_time);
|
||||||
|
this->ans->inum(inum);
|
||||||
|
host_start=inum;
|
||||||
|
|
||||||
|
if (ago==0) {
|
||||||
|
this->reset_nbors(nall, inum, ilist, numj, firstneigh, success);
|
||||||
|
if (!success)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->atom->cast_x_data(host_x,host_type);
|
||||||
|
this->atom->cast_q_data(host_q);
|
||||||
|
this->hd_balancer.start_timer();
|
||||||
|
this->atom->add_x_data(host_x,host_type);
|
||||||
|
this->atom->add_q_data();
|
||||||
|
|
||||||
|
this->device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
|
||||||
|
boxlo, prd);
|
||||||
|
|
||||||
|
t_ago = ago;
|
||||||
|
loop(eflag,vflag);
|
||||||
|
this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist);
|
||||||
|
this->device->add_ans_object(this->ans);
|
||||||
|
this->hd_balancer.stop_timer();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int** LJTIP4PLongT::compute(const int ago, const int inum_full,
|
||||||
|
const int nall, double **host_x, int *host_type,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int *map_array, int map_size, int *sametag, int max_same,
|
||||||
|
int **nspecial, tagint **special, const bool eflag,
|
||||||
|
const bool vflag, const bool eatom,
|
||||||
|
const bool vatom, int &host_start,
|
||||||
|
int **ilist, int **jnum,
|
||||||
|
const double cpu_time, bool &success,
|
||||||
|
double *host_q, double *boxlo, double *prd) {
|
||||||
|
this->acc_timers();
|
||||||
|
if (inum_full==0) {
|
||||||
|
host_start=0;
|
||||||
|
// Make sure textures are correct if realloc by a different hybrid style
|
||||||
|
this->resize_atom(0,nall,success);
|
||||||
|
this->zero_timers();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->hd_balancer.balance(cpu_time);
|
||||||
|
int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
|
||||||
|
this->ans->inum(inum);
|
||||||
|
host_start=inum;
|
||||||
|
|
||||||
|
// Build neighbor list on GPU if necessary
|
||||||
|
if (ago==0) {
|
||||||
|
this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
|
||||||
|
sublo, subhi, tag, nspecial, special, success);
|
||||||
|
if (!success)
|
||||||
|
return NULL;
|
||||||
|
this->atom->cast_q_data(host_q);
|
||||||
|
this->hd_balancer.start_timer();
|
||||||
|
} else {
|
||||||
|
this->atom->cast_x_data(host_x,host_type);
|
||||||
|
this->atom->cast_q_data(host_q);
|
||||||
|
this->hd_balancer.start_timer();
|
||||||
|
this->atom->add_x_data(host_x,host_type);
|
||||||
|
}
|
||||||
|
this->atom->add_q_data();
|
||||||
|
*ilist=this->nbor->host_ilist.begin();
|
||||||
|
*jnum=this->nbor->host_acc.begin();
|
||||||
|
|
||||||
|
copy_relations_data(nall, tag, map_array, map_size, sametag, max_same, ago);
|
||||||
|
|
||||||
|
this->device->precompute(ago,inum_full,nall,host_x,host_type,success,host_q,
|
||||||
|
boxlo, prd);
|
||||||
|
|
||||||
|
t_ago = ago;
|
||||||
|
loop(eflag,vflag);
|
||||||
|
this->ans->copy_answers(eflag,vflag,eatom,vatom);
|
||||||
|
this->device->add_ans_object(this->ans);
|
||||||
|
this->hd_balancer.stop_timer();
|
||||||
|
|
||||||
|
return this->nbor->host_jlist.begin()-host_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template class LJ_TIP4PLong<PRECISION,ACC_PRECISION>;
|
template class LJ_TIP4PLong<PRECISION,ACC_PRECISION>;
|
||||||
|
|
|
@ -158,6 +158,62 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_,
|
||||||
} // if ii
|
} // if ii
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_,
|
||||||
|
const __global int * dev_nbor,
|
||||||
|
const __global int * dev_packed,
|
||||||
|
const int nall, const int inum,
|
||||||
|
const int nbor_pitch, const int t_per_atom,
|
||||||
|
__global int *restrict hneigh,
|
||||||
|
__global numtyp4 *restrict m,
|
||||||
|
const int typeO, const int typeH,
|
||||||
|
const __global int *restrict tag, const __global int *restrict map,
|
||||||
|
const __global int *restrict sametag) {
|
||||||
|
int tid, ii, offset;
|
||||||
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
int i = BLOCK_ID_X*(BLOCK_SIZE_X)+tid;
|
||||||
|
|
||||||
|
if (i<nall) {
|
||||||
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
|
|
||||||
|
int iH1, iH2, iO;
|
||||||
|
int itype = ix.w;
|
||||||
|
if(itype == typeO) {
|
||||||
|
iO = i;
|
||||||
|
if (hneigh[i*4+2] != -1) {
|
||||||
|
iH1 = atom_mapping(map, tag[i] + 1);
|
||||||
|
iH2 = atom_mapping(map, tag[i] + 2);
|
||||||
|
// set iH1,iH2 to closest image to O
|
||||||
|
iH1 = closest_image(i, iH1, sametag, x_);
|
||||||
|
iH2 = closest_image(i, iH2, sametag, x_);
|
||||||
|
hneigh[i*4 ] = iH1;
|
||||||
|
hneigh[i*4+1] = iH2;
|
||||||
|
hneigh[i*4+2] = -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (hneigh[i*4+2] != -1) {
|
||||||
|
int iI, iH;
|
||||||
|
iI = atom_mapping(map,tag[i] - 1);
|
||||||
|
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
|
||||||
|
if ((int)iIx.w == typeH) {
|
||||||
|
iO = atom_mapping(map,tag[i] - 2);
|
||||||
|
iO = closest_image(i, iO, sametag, x_);
|
||||||
|
iH1 = closest_image(i, iI, sametag, x_);
|
||||||
|
iH2 = i;
|
||||||
|
} else { //if ((int)iIx.w == typeO)
|
||||||
|
iH = atom_mapping(map, tag[i] + 1);
|
||||||
|
iO = closest_image(i,iI,sametag, x_);
|
||||||
|
iH1 = i;
|
||||||
|
iH2 = closest_image(i,iH,sametag, x_);
|
||||||
|
}
|
||||||
|
hneigh[i*4+0] = iO;
|
||||||
|
hneigh[i*4+1] += -1;
|
||||||
|
hneigh[i*4+2] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
__kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp4 *restrict lj1,
|
const __global numtyp4 *restrict lj1,
|
||||||
const __global numtyp4 *restrict lj3,
|
const __global numtyp4 *restrict lj3,
|
||||||
|
@ -211,63 +267,17 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
|
|
||||||
if(itype == typeO) {
|
if(itype == typeO) {
|
||||||
iO = i;
|
iO = i;
|
||||||
if (hneigh[i*4+2] != -1) {
|
iH1 = hneigh[i*4 ];
|
||||||
iH1 = atom_mapping(map, tag[i] + 1);
|
iH2 = hneigh[i*4+1];
|
||||||
iH2 = atom_mapping(map, tag[i] + 2);
|
|
||||||
// set iH1,iH2 to closest image to O
|
|
||||||
iH1 = closest_image(i, iH1, sametag, x_);
|
|
||||||
iH2 = closest_image(i, iH2, sametag, x_);
|
|
||||||
hneigh[ i*4 ] = iH1;
|
|
||||||
hneigh[ i*4+1] = iH2;
|
|
||||||
hneigh[ i*4+2] = -1;
|
|
||||||
hneigh[iH1*4 ] = i;
|
|
||||||
hneigh[iH1*4+1] += -1;
|
|
||||||
hneigh[iH1*4+2] = -1;
|
|
||||||
hneigh[iH2*4 ] = i;
|
|
||||||
hneigh[iH2*4+1] += -1;
|
|
||||||
hneigh[iH2*4+2] = -1;
|
|
||||||
} else {
|
|
||||||
iH1 = hneigh[i*4 ];
|
|
||||||
iH2 = hneigh[i*4+1];
|
|
||||||
}
|
|
||||||
if(fabs(m[iO].w) <= eq_zero) {
|
if(fabs(m[iO].w) <= eq_zero) {
|
||||||
compute_newsite(iO,iH1,iH2, &m[iO], alpha, x_);
|
compute_newsite(iO,iH1,iH2, &m[iO], alpha, x_);
|
||||||
m[iO].w = qtmp;
|
m[iO].w = qtmp;
|
||||||
}
|
}
|
||||||
x1 = m[iO];
|
x1 = m[iO];
|
||||||
} else {
|
} else {
|
||||||
if (hneigh[i*4+2] != -1) {
|
iO = hneigh[i *4 ];
|
||||||
int iI, iH;
|
iH1 = hneigh[iO*4 ];
|
||||||
iI = atom_mapping(map,tag[i] - 1);
|
iH2 = hneigh[iO*4+1];
|
||||||
numtyp4 iIx; fetch4(iIx,iI,pos_tex); //x_[iI];
|
|
||||||
if ((int)iIx.w == typeH) {
|
|
||||||
iO = atom_mapping(map,tag[i] - 2);
|
|
||||||
iO = closest_image(i, iO, sametag, x_);
|
|
||||||
iH1 = closest_image(i, iI, sametag, x_);
|
|
||||||
iH2 = i;
|
|
||||||
} else { //if ((int)iIx.w == typeO)
|
|
||||||
iH = atom_mapping(map, tag[i] + 1);
|
|
||||||
iO = closest_image(i,iI,sametag, x_);
|
|
||||||
iH1 = i;
|
|
||||||
iH2 = closest_image(i,iH,sametag, x_);
|
|
||||||
}
|
|
||||||
|
|
||||||
hneigh[iH1*4+0] = iO;
|
|
||||||
hneigh[iH1*4+1] += -1;
|
|
||||||
hneigh[iH1*4+2] = -1;
|
|
||||||
|
|
||||||
hneigh[iH2*4+0] = iO;
|
|
||||||
hneigh[iH2*4+1] += -1;
|
|
||||||
hneigh[iH2*4+2] = -1;
|
|
||||||
|
|
||||||
hneigh[iO*4+0] = iH1;
|
|
||||||
hneigh[iO*4+1] = iH2;
|
|
||||||
hneigh[iO*4+2] = -1;
|
|
||||||
} else {
|
|
||||||
iO = hneigh[i *4 ];
|
|
||||||
iH1 = hneigh[iO*4 ];
|
|
||||||
iH2 = hneigh[iO*4+1];
|
|
||||||
}
|
|
||||||
if (iO >= inum) {
|
if (iO >= inum) {
|
||||||
non_local_oxy = 1;
|
non_local_oxy = 1;
|
||||||
if(fabs(m[iO].w) <= eq_zero) {
|
if(fabs(m[iO].w) <= eq_zero) {
|
||||||
|
@ -327,25 +337,8 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
if(itype == typeO || jtype == typeO) {
|
if(itype == typeO || jtype == typeO) {
|
||||||
if (jtype == typeO) {
|
if (jtype == typeO) {
|
||||||
jO = j;
|
jO = j;
|
||||||
if (hneigh[j*4+2] != -1) {
|
jH1 = hneigh[j*4 ];
|
||||||
jH1 = atom_mapping(map,tag[j] + 1);
|
jH2 = hneigh[j*4+1];
|
||||||
jH2 = atom_mapping(map,tag[j] + 2);
|
|
||||||
// set iH1,iH2 to closest image to O
|
|
||||||
jH1 = closest_image(j, jH1, sametag, x_);
|
|
||||||
jH2 = closest_image(j, jH2, sametag, x_);
|
|
||||||
hneigh[j*4 ] = jH1;
|
|
||||||
hneigh[j*4+1] = jH2;
|
|
||||||
hneigh[j*4+2] = -1;
|
|
||||||
hneigh[jH1*4 ] = j;
|
|
||||||
hneigh[jH1*4+1] += -1;
|
|
||||||
hneigh[jH1*4+2] = -1;
|
|
||||||
hneigh[jH2*4 ] = j;
|
|
||||||
hneigh[jH2*4+1] += -1;
|
|
||||||
hneigh[jH2*4+2] = -1;
|
|
||||||
} else {
|
|
||||||
jH1 = hneigh[j*4 ];
|
|
||||||
jH2 = hneigh[j*4+1];
|
|
||||||
}
|
|
||||||
if (fabs(m[j].w) <= eq_zero) {
|
if (fabs(m[j].w) <= eq_zero) {
|
||||||
compute_newsite(j, jH1, jH2, &m[j], alpha, x_);
|
compute_newsite(j, jH1, jH2, &m[j], alpha, x_);
|
||||||
m[j].w = qj;
|
m[j].w = qj;
|
||||||
|
@ -405,7 +398,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
||||||
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
||||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||||
vdj.w = vdj.w;
|
//vdj.w = vdj.w;
|
||||||
virial[0] += (ix.x - vdj.x)*fd.x;
|
virial[0] += (ix.x - vdj.x)*fd.x;
|
||||||
virial[1] += (ix.y - vdj.y)*fd.y;
|
virial[1] += (ix.y - vdj.y)*fd.y;
|
||||||
virial[2] += (ix.z - vdj.z)*fd.z;
|
virial[2] += (ix.z - vdj.z)*fd.z;
|
||||||
|
@ -422,7 +415,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
vdi.x = xO.x*cO + xH1.x*cH + xH2.x*cH;
|
vdi.x = xO.x*cO + xH1.x*cH + xH2.x*cH;
|
||||||
vdi.y = xO.y*cO + xH1.y*cH + xH2.y*cH;
|
vdi.y = xO.y*cO + xH1.y*cH + xH2.y*cH;
|
||||||
vdi.z = xO.z*cO + xH1.z*cH + xH2.z*cH;
|
vdi.z = xO.z*cO + xH1.z*cH + xH2.z*cH;
|
||||||
vdi.w = vdi.w;
|
//vdi.w = vdi.w;
|
||||||
if (jtype != typeH){
|
if (jtype != typeH){
|
||||||
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
|
numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex);
|
||||||
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
|
numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex);
|
||||||
|
@ -430,7 +423,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_,
|
||||||
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH;
|
||||||
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH;
|
||||||
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH;
|
||||||
vdj.w = vdj.w;
|
//vdj.w = vdj.w;
|
||||||
} else vdj = jx;
|
} else vdj = jx;
|
||||||
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
|
vO[0] += 0.5*(vdi.x - vdj.x)*fd.x;
|
||||||
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
|
vO[1] += 0.5*(vdi.y - vdj.y)*fd.y;
|
||||||
|
|
|
@ -62,10 +62,29 @@ public:
|
||||||
/// Total host memory used by library for pair style
|
/// Total host memory used by library for pair style
|
||||||
double host_memory_usage() const;
|
double host_memory_usage() const;
|
||||||
|
|
||||||
/// Copy data which is easier to compute in LAMMPS_NS
|
/// Copy data from LAMMPS_NS
|
||||||
void copy_relations_data(int **hn, double **m, int n,int* tag, int *map_array, int map_size,
|
void copy_relations_data(int n,int* tag, int *map_array, int map_size,
|
||||||
int *sametag, int max_same, int ago);
|
int *sametag, int max_same, int ago);
|
||||||
|
|
||||||
|
/// Reimplement BaseCharge pair loop with host neighboring
|
||||||
|
void compute(const int f_ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, int *ilist, int *numj,
|
||||||
|
int **firstneigh, const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
const double cpu_time, bool &success, double *charge,
|
||||||
|
const int nlocal, double *boxlo, double *prd);
|
||||||
|
|
||||||
|
/// Reimplement BaseCharge pair loop with device neighboring
|
||||||
|
int** compute(const int ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, double *sublo,
|
||||||
|
double *subhi, tagint *tag,int *map_array, int map_size, int *sametag, int max_same,
|
||||||
|
int **nspecial,
|
||||||
|
tagint **special, const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||||
|
double *charge, double *boxlo, double *prd);
|
||||||
|
|
||||||
|
|
||||||
// --------------------------- TYPE DATA --------------------------
|
// --------------------------- TYPE DATA --------------------------
|
||||||
|
|
||||||
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw
|
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw
|
||||||
|
@ -77,7 +96,6 @@ public:
|
||||||
/// Special LJ values [0-3] and Special Coul values [4-7]
|
/// Special LJ values [0-3] and Special Coul values [4-7]
|
||||||
UCL_D_Vec<numtyp> sp_lj;
|
UCL_D_Vec<numtyp> sp_lj;
|
||||||
|
|
||||||
/// If atom type constants fit in shared memory, use fast kernels
|
|
||||||
bool shared_types;
|
bool shared_types;
|
||||||
|
|
||||||
/// Number of atom types
|
/// Number of atom types
|
||||||
|
@ -98,10 +116,11 @@ public:
|
||||||
UCL_D_Vec<int> map_array;
|
UCL_D_Vec<int> map_array;
|
||||||
UCL_D_Vec<int> atom_sametag;
|
UCL_D_Vec<int> atom_sametag;
|
||||||
|
|
||||||
UCL_Kernel k_pair_distrib;
|
UCL_Kernel k_pair_distrib, k_pair_reneigh;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool _allocated;
|
bool _allocated;
|
||||||
|
int t_ago;
|
||||||
void loop(const bool _eflag, const bool _vflag);
|
void loop(const bool _eflag, const bool _vflag);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -112,14 +112,18 @@ void ljtip4p_long_gpu_clear() {
|
||||||
|
|
||||||
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum_full,
|
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum_full,
|
||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi,
|
||||||
|
tagint *tag, int *map_array, int map_size,
|
||||||
|
int *sametag, int max_same,
|
||||||
|
int **nspecial,
|
||||||
tagint **special, const bool eflag, const bool vflag,
|
tagint **special, const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, double *host_q, double *boxlo,
|
||||||
double *prd) {
|
double *prd) {
|
||||||
return LJTIP4PLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
return LJTIP4PLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
subhi, tag, map_array, map_size, sametag, max_same,
|
||||||
|
nspecial, special, eflag, vflag, eatom,
|
||||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||||
host_q,boxlo, prd);
|
host_q,boxlo, prd);
|
||||||
}
|
}
|
||||||
|
@ -139,10 +143,10 @@ double ljtip4p_long_gpu_bytes() {
|
||||||
return LJTIP4PLMF.host_memory_usage();
|
return LJTIP4PLMF.host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ljtip4p_long_copy_molecule_data(int **hn, double **m, int n, int* tag,
|
void ljtip4p_long_copy_molecule_data(int n, int* tag,
|
||||||
int *map_array, int map_size,
|
int *map_array, int map_size,
|
||||||
int *sametag, int max_same, int ago){
|
int *sametag, int max_same, int ago){
|
||||||
LJTIP4PLMF.copy_relations_data(hn, m, n, tag,map_array,map_size,sametag, max_same, ago);
|
LJTIP4PLMF.copy_relations_data(n, tag, map_array, map_size, sametag, max_same, ago);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,10 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||||
void ljtip4p_long_gpu_clear();
|
void ljtip4p_long_gpu_clear();
|
||||||
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum,
|
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum,
|
||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi,
|
||||||
|
tagint *tag, int *map_array, int map_size,
|
||||||
|
int *sametag, int max_same,
|
||||||
|
int **nspecial,
|
||||||
tagint **special, const bool eflag, const bool vflag,
|
tagint **special, const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum,
|
int **ilist, int **jnum,
|
||||||
|
@ -78,7 +81,7 @@ void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall,
|
||||||
bool &success, double *host_q, const int nlocal,
|
bool &success, double *host_q, const int nlocal,
|
||||||
double *boxlo, double *prd);
|
double *boxlo, double *prd);
|
||||||
double ljtip4p_long_gpu_bytes();
|
double ljtip4p_long_gpu_bytes();
|
||||||
void ljtip4p_long_copy_molecule_data(int **, double **, int, int* , int *,
|
void ljtip4p_long_copy_molecule_data(int, int* , int *,
|
||||||
int, int *, int , int);
|
int, int *, int , int);
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
@ -112,17 +115,16 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
|
||||||
int nall = atom->nlocal + atom->nghost;
|
int nall = atom->nlocal + atom->nghost;
|
||||||
int inum, host_start;
|
int inum, host_start;
|
||||||
|
|
||||||
ljtip4p_long_copy_molecule_data(hneigh, newsite, nall, atom->tag,
|
|
||||||
atom->get_map_array(), atom->get_map_size(),
|
|
||||||
atom->sametag, atom->get_max_same(), neighbor->ago);
|
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
int *ilist, *numneigh, **firstneigh;
|
int *ilist, *numneigh, **firstneigh;
|
||||||
if (gpu_mode != GPU_FORCE) {
|
if (gpu_mode != GPU_FORCE) {
|
||||||
inum = atom->nlocal;
|
inum = atom->nlocal;
|
||||||
firstneigh = ljtip4p_long_gpu_compute_n(neighbor->ago, inum, nall,
|
firstneigh = ljtip4p_long_gpu_compute_n(neighbor->ago, inum, nall,
|
||||||
atom->x, atom->type, domain->sublo,
|
atom->x, atom->type, domain->sublo,
|
||||||
domain->subhi, atom->tag, atom->nspecial,
|
domain->subhi,
|
||||||
|
atom->tag, atom->get_map_array(), atom->get_map_size(),
|
||||||
|
atom->sametag, atom->get_max_same(),
|
||||||
|
atom->nspecial,
|
||||||
atom->special, eflag, vflag, eflag_atom,
|
atom->special, eflag, vflag, eflag_atom,
|
||||||
vflag_atom, host_start, &ilist, &numneigh,
|
vflag_atom, host_start, &ilist, &numneigh,
|
||||||
cpu_time, success, atom->q, domain->boxlo,
|
cpu_time, success, atom->q, domain->boxlo,
|
||||||
|
@ -132,6 +134,9 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
|
||||||
ilist = list->ilist;
|
ilist = list->ilist;
|
||||||
numneigh = list->numneigh;
|
numneigh = list->numneigh;
|
||||||
firstneigh = list->firstneigh;
|
firstneigh = list->firstneigh;
|
||||||
|
ljtip4p_long_copy_molecule_data(nall, atom->tag,
|
||||||
|
atom->get_map_array(), atom->get_map_size(),
|
||||||
|
atom->sametag, atom->get_max_same(), neighbor->ago);
|
||||||
ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
||||||
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
||||||
vflag_atom, host_start, cpu_time, success, atom->q,
|
vflag_atom, host_start, cpu_time, success, atom->q,
|
||||||
|
|
Loading…
Reference in New Issue