Merge pull request #362 from ibaned/warnings2

fix Kokkos+kspace warnings
This commit is contained in:
sjplimp 2017-02-03 14:43:12 -07:00 committed by GitHub
commit dc34a32602
4 changed files with 52 additions and 60 deletions

View File

@ -45,7 +45,9 @@ class PairCoulLongKokkos : public PairCoulLong {
double init_one(int, int); double init_one(int, int);
struct params_coul{ struct params_coul{
KOKKOS_INLINE_FUNCTION
params_coul(){cut_coulsq=0;}; params_coul(){cut_coulsq=0;};
KOKKOS_INLINE_FUNCTION
params_coul(int i){cut_coulsq=0;}; params_coul(int i){cut_coulsq=0;};
F_FLOAT cut_coulsq; F_FLOAT cut_coulsq;
}; };

View File

@ -374,7 +374,6 @@ void PPPMKokkos<DeviceType>::setup()
error->all(FLERR,"Incorrect boundaries with slab PPPM"); error->all(FLERR,"Incorrect boundaries with slab PPPM");
} }
int i,j,k,n;
double *prd; double *prd;
// volume-dependent factors // volume-dependent factors
@ -417,10 +416,6 @@ void PPPMKokkos<DeviceType>::setup()
DeviceType::fence(); DeviceType::fence();
copymode = 0; copymode = 0;
// virial coefficients
double sqk,vterm;
// merge three outer loops into one for better threading // merge three outer loops into one for better threading
numz_fft = nzhi_fft-nzlo_fft + 1; numz_fft = nzhi_fft-nzlo_fft + 1;
@ -615,7 +610,7 @@ void PPPMKokkos<DeviceType>::setup_grid()
template<class DeviceType> template<class DeviceType>
void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
{ {
int i,j; int i;
// set energy/virial flags // set energy/virial flags
// invoke allocate_peratom() if needed for first time // invoke allocate_peratom() if needed for first time
@ -974,7 +969,6 @@ void PPPMKokkos<DeviceType>::set_grid_global()
// fluid-occupied volume used to estimate real-space error // fluid-occupied volume used to estimate real-space error
// zprd used rather than zprd_slab // zprd used rather than zprd_slab
double h;
bigint natoms = atomKK->natoms; bigint natoms = atomKK->natoms;
if (!gewaldflag) { if (!gewaldflag) {
@ -1636,9 +1630,6 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_particle_map, const int &i) cons
template<class DeviceType> template<class DeviceType>
void PPPMKokkos<DeviceType>::make_rho() void PPPMKokkos<DeviceType>::make_rho()
{ {
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array // clear 3d density array
//memset(&(density_brick(nzlo_out,nylo_out,nxlo_out)),0, //memset(&(density_brick(nzlo_out,nylo_out,nxlo_out)),0,
@ -1845,8 +1836,7 @@ void PPPMKokkos<DeviceType>::poisson()
template<class DeviceType> template<class DeviceType>
void PPPMKokkos<DeviceType>::poisson_ik() void PPPMKokkos<DeviceType>::poisson_ik()
{ {
int i,j,k,n; int j;
double eng;
// transform charge density (r -> k) // transform charge density (r -> k)
@ -1877,7 +1867,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
for (j = 0; j < 6; j++) virial[j] += ev.v[j]; for (j = 0; j < 6; j++) virial[j] += ev.v[j];
energy += ev.ecoul; energy += ev.ecoul;
} else { } else {
n = 0;
copymode = 1; copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev); Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev);
DeviceType::fence(); DeviceType::fence();
@ -2214,8 +2203,6 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_poisson_ik_triclinic6, const int
template<class DeviceType> template<class DeviceType>
void PPPMKokkos<DeviceType>::poisson_peratom() void PPPMKokkos<DeviceType>::poisson_peratom()
{ {
int i,j,k,n;
// merge three outer loops into one for better threading // merge three outer loops into one for better threading
numz_inout = (nzhi_in-nzlo_out)-(nzlo_in-nzlo_out) + 1; numz_inout = (nzhi_in-nzlo_out)-(nzlo_in-nzlo_out) + 1;
@ -2567,22 +2554,17 @@ KOKKOS_INLINE_FUNCTION
void PPPMKokkos<DeviceType>::operator()(TagPPPM_fieldforce_ik, const int &i) const void PPPMKokkos<DeviceType>::operator()(TagPPPM_fieldforce_ik, const int &i) const
{ {
int l,m,n,nx,ny,nz,mx,my,mz; int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR x0,y0,z0;
FFT_SCALAR ekx,eky,ekz; FFT_SCALAR ekx,eky,ekz;
nx = d_part2grid(i,0); nx = d_part2grid(i,0);
ny = d_part2grid(i,1); ny = d_part2grid(i,1);
nz = d_part2grid(i,2); nz = d_part2grid(i,2);
dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv;
dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv;
dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv;
nz -= nzlo_out; nz -= nzlo_out;
ny -= nylo_out; ny -= nylo_out;
nx -= nxlo_out; nx -= nxlo_out;
//compute_rho1d(i,dx,dy,dz); // hasn't changed from make_rho
ekx = eky = ekz = ZEROF; ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) { for (n = nlower; n <= nupper; n++) {
mz = n+nz; mz = n+nz;
@ -2842,8 +2824,8 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
d_buf = k_buf.view<DeviceType>(); d_buf = k_buf.view<DeviceType>();
int nx = (nxhi_out-nxlo_out+1); nx = (nxhi_out-nxlo_out+1);
int ny = (nyhi_out-nylo_out+1); ny = (nyhi_out-nylo_out+1);
copymode = 1; copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this); Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this);
@ -3012,7 +2994,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
// sum local contributions to get global dipole moment // sum local contributions to get global dipole moment
dipole_all;
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
// need to make non-neutral systems and/or // need to make non-neutral systems and/or

View File

@ -67,7 +67,10 @@
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{ {
int i,total,length,offset,num; int i,total,length,offset,num;
FFT_SCALAR norm, *out_ptr; FFT_SCALAR norm;
#if defined(FFT_FFTW3)
FFT_SCALAR *out_ptr;
#endif
FFT_DATA *data,*copy; FFT_DATA *data,*copy;
// system specific constants // system specific constants
@ -296,7 +299,9 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
if (flag == 1 && plan->scaled) { if (flag == 1 && plan->scaled) {
norm = plan->norm; norm = plan->norm;
num = plan->normnum; num = plan->normnum;
#if defined(FFT_FFTW3)
out_ptr = (FFT_SCALAR *)out; out_ptr = (FFT_SCALAR *)out;
#endif
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3) #if defined(FFT_FFTW3)
*(out_ptr++) *= norm; *(out_ptr++) *= norm;
@ -361,13 +366,12 @@ struct fft_plan_3d *fft_3d_create_plan(
{ {
struct fft_plan_3d *plan; struct fft_plan_3d *plan;
int me,nprocs; int me,nprocs;
int i,num,flag,remapflag,fftflag; int flag,remapflag;
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi; int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi; int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi; int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
int out_size,first_size,second_size,third_size,copy_size,scratch_size; int out_size,first_size,second_size,third_size,copy_size,scratch_size;
int np1,np2,ip1,ip2; int np1,np2,ip1,ip2;
int list[50];
// system specific variables // system specific variables
@ -690,6 +694,9 @@ struct fft_plan_3d *fft_3d_create_plan(
flag = 0; flag = 0;
int i,num,fftflag;
int list[50];
num = 0; num = 0;
factor(nfast,&num,list); factor(nfast,&num,list);
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
@ -1086,8 +1093,11 @@ void bifactor(int n, int *factor1, int *factor2)
void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
{ {
int i,total,length,offset,num; int i,num;
FFT_SCALAR norm, *data_ptr; FFT_SCALAR norm;
#if defined(FFT_FFTW3)
FFT_SCALAR *data_ptr;
#endif
// system specific constants // system specific constants
@ -1132,20 +1142,20 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
// data is just an array of 0.0 // data is just an array of 0.0
#ifdef FFT_SGI #ifdef FFT_SGI
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(flag,length1,&data[offset],1,plan->coeff1); FFT_1D(flag,length1,&data[offset],1,plan->coeff1);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(flag,length2,&data[offset],1,plan->coeff2); FFT_1D(flag,length2,&data[offset],1,plan->coeff2);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(flag,length3,&data[offset],1,plan->coeff3); FFT_1D(flag,length3,&data[offset],1,plan->coeff3);
#elif defined(FFT_SCSL) #elif defined(FFT_SCSL)
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1, FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1,
plan->work1,&isys); plan->work1,&isys);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2, FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2,
plan->work2,&isys); plan->work2,&isys);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3, FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3,
plan->work3,&isys); plan->work3,&isys);
#elif defined(FFT_ACML) #elif defined(FFT_ACML)
@ -1157,11 +1167,11 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
num=total3/length3; num=total3/length3;
FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info); FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info);
#elif defined(FFT_INTEL) #elif defined(FFT_INTEL)
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(&data[offset],&length1,&flag,plan->coeff1); FFT_1D(&data[offset],&length1,&flag,plan->coeff1);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(&data[offset],&length2,&flag,plan->coeff2); FFT_1D(&data[offset],&length2,&flag,plan->coeff2);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(&data[offset],&length3,&flag,plan->coeff3); FFT_1D(&data[offset],&length3,&flag,plan->coeff3);
#elif defined(FFT_MKL) #elif defined(FFT_MKL)
if (flag == -1) { if (flag == -1) {
@ -1175,28 +1185,28 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
} }
#elif defined(FFT_DEC) #elif defined(FFT_DEC)
if (flag == -1) { if (flag == -1) {
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one); FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one); FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one); FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one);
} else { } else {
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one); FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one); FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one); FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one);
} }
#elif defined(FFT_T3E) #elif defined(FFT_T3E)
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1, FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1,
plan->work1,&isys); plan->work1,&isys);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2, FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2,
plan->work2,&isys); plan->work2,&isys);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3, FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3,
plan->work3,&isys); plan->work3,&isys);
#elif defined(FFT_FFTW2) #elif defined(FFT_FFTW2)
@ -1228,18 +1238,18 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
FFTW_API(execute_dft)(theplan,data,data); FFTW_API(execute_dft)(theplan,data,data);
#else #else
if (flag == -1) { if (flag == -1) {
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]);
} else { } else {
for (offset = 0; offset < total1; offset += length1) for (int offset = 0; offset < total1; offset += length1)
kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]);
for (offset = 0; offset < total2; offset += length2) for (int offset = 0; offset < total2; offset += length2)
kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]);
for (offset = 0; offset < total3; offset += length3) for (int offset = 0; offset < total3; offset += length3)
kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]);
} }
#endif #endif
@ -1251,7 +1261,9 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
if (flag == 1 && plan->scaled) { if (flag == 1 && plan->scaled) {
norm = plan->norm; norm = plan->norm;
num = MIN(plan->normnum,nsize); num = MIN(plan->normnum,nsize);
#if defined(FFT_FFTW3)
data_ptr = (FFT_SCALAR *)data; data_ptr = (FFT_SCALAR *)data;
#endif
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
#if defined(FFT_FFTW3) #if defined(FFT_FFTW3)
*(data_ptr++) *= norm; *(data_ptr++) *= norm;

View File

@ -230,7 +230,7 @@ void FixCMAP::min_setup(int vflag)
void FixCMAP::pre_neighbor() void FixCMAP::pre_neighbor()
{ {
int i,m,itype,atom1,atom2,atom3,atom4,atom5; int i,m,atom1,atom2,atom3,atom4,atom5;
// guesstimate initial length of local crossterm list // guesstimate initial length of local crossterm list
// if ncmap was not set (due to read_restart, no read_data), // if ncmap was not set (due to read_restart, no read_data),
@ -842,7 +842,7 @@ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo,
// use the bicubic spline to calculate the derivatives // use the bicubic spline to calculate the derivatives
int i, j, k, ii, jj, xm, p; int i, j, k, ii, jj, xm, p;
double phi, psi, y, d1y, d2y, d12y, tyyk,tdyk; double phi, psi, d1y, d2y, d12y, tyyk,tdyk;
double *tmp_y, *tmp_dy, *tmp_ddy, **tmap, **tddmap; double *tmp_y, *tmp_dy, *tmp_ddy, **tmap, **tddmap;
int ix; int ix;
double a,b,a1,b1,a2,b2; double a,b,a1,b1,a2,b2;
@ -850,7 +850,6 @@ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo,
xm = CMAPDIM/2; xm = CMAPDIM/2;
p = CMAPDIM; p = CMAPDIM;
y = 0.;
d1y = 0.; d1y = 0.;
d2y = 0.; d2y = 0.;
d12y = 0.; d12y = 0.;
@ -907,8 +906,6 @@ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo,
b1 = b*b*b-b; b1 = b*b*b-b;
a2 = 3.0*a*a-1.0; a2 = 3.0*a*a-1.0;
b2 = 3.0*b*b-1.0; b2 = 3.0*b*b-1.0;
y = a*tmp_y[ix]+b*tmp_y[ix+1]+
(a1*tmp_ddy[ix]+b1*tmp_ddy[ix+1])*(CMAPDX*CMAPDX)/6.0;
d1y = (tmp_y[ix+1]-tmp_y[ix])/CMAPDX- d1y = (tmp_y[ix+1]-tmp_y[ix])/CMAPDX-
a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1]; a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1];
spline(tmp_dy,tmp_ddy,CMAPDIM+xm+xm); spline(tmp_dy,tmp_ddy,CMAPDIM+xm+xm);
@ -1015,8 +1012,8 @@ void FixCMAP::bc_interpol(double x1, double x2, int low1, int low2, double *gs,
// gradients and cross-derivatives // gradients and cross-derivatives
// calculate the interpolated value of the point of interest (POI) // calculate the interpolated value of the point of interest (POI)
int i, p=12; int i;
double t, u, fac, gs1l, gs2l, gs1u, gs2u; double t, u, gs1l, gs2l;
// set the interpolation coefficients // set the interpolation coefficients