git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6261 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-05-31 21:09:14 +00:00
parent 3692e8c68b
commit 2ea09945bf
121 changed files with 22597 additions and 16 deletions

View File

@ -4,31 +4,189 @@
if (test $1 = 1) then
if (test -e ../Makefile.package) then
sed -i -e '/include ..\/..\/lib\/cuda\/Makefile.common/d' ../Makefile.package
sed -i -e 's/-llammpscuda -lcuda -lcudart -lrt //' ../Makefile.package
sed -i -e 's/-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include //' ../Makefile.package
sed -i -e 's/-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) -DLMP_USER_CUDA //' ../Makefile.package
sed -i '1 i include ..\/..\/lib\/cuda\/Makefile.common' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include |' ../Makefile.package
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-llammpscuda -lcuda -lcudart -lrt |' ../Makefile.package
if (test ! -e ../Makefile.package) then
cp ../Makefile.package.empty ../Makefile.package
fi
sed -i -e '/^include.*cuda.*$/d' ../Makefile.package
sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*lrt[^ \t]* //g' ../Makefile.package
sed -i '4 i include ..\/..\/lib\/cuda\/Makefile.common' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/cuda -DLMP_USER_CUDA |' ../Makefile.package
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/cuda |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-llammpscuda |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&-I$(CUDA_INSTALL_PATH)\/include |' ../Makefile.package
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&-L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(CUDA_USRLIB_CONDITIONAL) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&-lcuda -lcudart -lrt |' ../Makefile.package
if (test -e ../atom_vec_angle.cpp) then
cp atom_vec_angle_cuda.cpp ..
cp atom_vec_angle_cuda.h ..
fi
if (test -e ../atom_vec_full.cpp) then
cp atom_vec_full_cuda.cpp ..
cp atom_vec_full_cuda.h ..
fi
if (test -e ../fix_freeze.cpp) then
cp fix_freeze_cuda.cpp ..
cp fix_freeze_cuda.h ..
fi
if (test -e ../pair_born_coul_long.cpp) then
cp pair_born_coul_long_cuda.cpp ..
cp pair_born_coul_long_cuda.h ..
fi
if (test -e ../pair_buck_coul_long.cpp) then
cp pair_buck_coul_long_cuda.cpp ..
cp pair_buck_coul_long_cuda.h ..
fi
if (test -e ../pair_cg_cmm.cpp) then
cp pair_cg_cmm_cuda.cpp ..
cp pair_cg_cmm_coul_cut_cuda.cpp ..
cp pair_cg_cmm_coul_debye_cuda.cpp ..
cp pair_cg_cmm_cuda.h ..
cp pair_cg_cmm_coul_cut_cuda.h ..
cp pair_cg_cmm_coul_debye_cuda.h ..
fi
if (test -e ../pair_cg_cmm_coul_long.cpp) then
cp pair_cg_cmm_coul_long_cuda.cpp ..
cp pair_cg_cmm_coul_long_cuda.h ..
fi
if (test -e ../pppm.cpp) then
cp pppm_cuda.cpp ..
cp fft3d_cuda.cpp ..
cp fft3d_wrap_cuda.cpp ..
cp pppm_cuda.h ..
cp fft3d_cuda.h ..
cp fft3d_wrap_cuda.h ..
cp pair_lj_cut_coul_long_cuda.cpp ..
cp pair_lj_cut_coul_long_cuda.h ..
fi
if (test -e ../pair_eam.cpp) then
cp pair_eam_alloy_cuda.cpp ..
cp pair_eam_cuda.cpp ..
cp pair_eam_fs_cuda.cpp ..
cp pair_eam_alloy_cuda.h ..
cp pair_eam_cuda.h ..
cp pair_eam_fs_cuda.h ..
fi
if (test -e ../pair_gran_hooke.cpp) then
cp pair_gran_hooke_cuda.cpp ..
cp pair_gran_hooke_cuda.h ..
fi
if (test -e ../pair_lj_charmm_coul_charmm.cpp) then
cp pair_lj_charmm_coul_charmm_cuda.cpp ..
cp pair_lj_charmm_coul_charmm_implicit_cuda.cpp ..
cp pair_lj_charmm_coul_charmm_cuda.h ..
cp pair_lj_charmm_coul_charmm_implicit_cuda.h ..
if (test -e ../pair_lj_charmm_coul_long.cpp) then
cp pair_lj_charmm_coul_long_cuda.cpp ..
cp pair_lj_charmm_coul_long_cuda.h ..
fi
fi
if (test -e ../pair_lj_class2.cpp) then
cp pair_lj_class2_coul_cut_cuda.cpp ..
cp pair_lj_class2_cuda.cpp ..
cp pair_lj_class2_coul_cut_cuda.h ..
cp pair_lj_class2_cuda.h ..
if (test -e ../pair_lj_class2_coul_long.cpp) then
cp pair_lj_class2_coul_long_cuda.cpp ..
cp pair_lj_class2_coul_long_cuda.h ..
fi
fi
cp atom_vec_atomic_cuda.cpp ..
cp atom_vec_charge_cuda.cpp ..
cp comm_cuda.cpp ..
cp compute_pe_cuda.cpp ..
cp compute_pressure_cuda.cpp ..
cp compute_temp_cuda.cpp ..
cp compute_temp_partial_cuda.cpp ..
cp domain_cuda.cpp ..
cp fix_addforce_cuda.cpp ..
cp fix_aveforce_cuda.cpp ..
cp fix_enforce2d_cuda.cpp ..
cp fix_gravity_cuda.cpp ..
cp fix_nh_cuda.cpp ..
cp fix_npt_cuda.cpp ..
cp fix_nve_cuda.cpp ..
cp fix_nvt_cuda.cpp ..
cp fix_set_force_cuda.cpp ..
cp fix_shake_cuda.cpp ..
cp fix_temp_berendsen_cuda.cpp ..
cp fix_temp_rescale_cuda.cpp ..
cp fix_temp_rescale_limit_cuda.cpp ..
cp fix_viscous_cuda.cpp ..
cp modify_cuda.cpp ..
cp neighbor_cuda.cpp ..
cp neigh_full_cuda.cpp ..
cp pair_buck_coul_cut_cuda.cpp ..
cp pair_buck_cuda.cpp ..
cp pair_lj96_cut_cuda.cpp ..
cp pair_lj_cut_coul_cut_cuda.cpp ..
cp pair_lj_cut_coul_debye_cuda.cpp ..
cp pair_lj_cut_cuda.cpp ..
cp pair_lj_cut_experimental_cuda.cpp ..
cp pair_lj_expand_cuda.cpp ..
cp pair_lj_gromacs_coul_gromacs_cuda.cpp ..
cp pair_lj_gromacs_cuda.cpp ..
cp pair_lj_smooth_cuda.cpp ..
cp pair_morse_cuda.cpp ..
cp pppm_cuda.cpp ..
cp verlet_cuda.cpp ..
cp cuda.cpp ..
cp cuda_neigh_list.cpp ..
cp atom_vec_atomic_cuda.h ..
cp atom_vec_charge_cuda.h ..
cp comm_cuda.h ..
cp compute_pe_cuda.h ..
cp compute_pressure_cuda.h ..
cp compute_temp_cuda.h ..
cp compute_temp_partial_cuda.h ..
cp domain_cuda.h ..
cp fix_addforce_cuda.h ..
cp fix_aveforce_cuda.h ..
cp fix_enforce2d_cuda.h ..
cp fix_gravity_cuda.h ..
cp fix_nh_cuda.h ..
cp fix_npt_cuda.h ..
cp fix_nve_cuda.h ..
cp fix_nvt_cuda.h ..
cp fix_set_force_cuda.h ..
cp fix_shake_cuda.h ..
cp fix_temp_berendsen_cuda.h ..
cp fix_temp_rescale_cuda.h ..
cp fix_temp_rescale_limit_cuda.h ..
cp fix_viscous_cuda.h ..
cp modify_cuda.h ..
cp neighbor_cuda.h ..
cp pair_buck_coul_cut_cuda.h ..
cp pair_buck_cuda.h ..
cp pair_lj96_cut_cuda.h ..
cp pair_lj_cut_coul_cut_cuda.h ..
cp pair_lj_cut_coul_debye_cuda.h ..
cp pair_lj_cut_cuda.h ..
cp pair_lj_cut_experimental_cuda.h ..
cp pair_lj_expand_cuda.h ..
cp pair_lj_gromacs_coul_gromacs_cuda.h ..
cp pair_lj_gromacs_cuda.h ..
cp pair_lj_smooth_cuda.h ..
cp pair_morse_cuda.h ..
cp verlet_cuda.h ..
cp cuda.h ..
@ -42,26 +200,136 @@ if (test $1 = 1) then
elif (test $1 = 0) then
if (test -e ../Makefile.package) then
sed -i -e '/include ..\/..\/lib\/cuda\/Makefile.common/d' ../Makefile.package
sed -i -e 's/-llammpscuda -lcuda -lcudart -lrt //' ../Makefile.package
sed -i -e 's/-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include //' ../Makefile.package
sed -i -e 's/-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) -DLMP_USER_CUDA //' ../Makefile.package
sed -i -e '/^include.*cuda.*$/d' ../Makefile.package
sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*lrt[^ \t]* //g' ../Makefile.package
fi
rm ../atom_vec_angle_cuda.cpp
rm ../atom_vec_atomic_cuda.cpp
rm ../atom_vec_charge_cuda.cpp
rm ../atom_vec_full_cuda.cpp
rm ../comm_cuda.cpp
rm ../compute_pe_cuda.cpp
rm ../compute_pressure_cuda.cpp
rm ../compute_temp_cuda.cpp
rm ../compute_temp_partial_cuda.cpp
rm ../domain_cuda.cpp
rm ../fft3d_cuda.cpp
rm ../fft3d_wrap_cuda.cpp
rm ../fix_addforce_cuda.cpp
rm ../fix_aveforce_cuda.cpp
rm ../fix_enforce2d_cuda.cpp
rm ../fix_freeze_cuda.cpp
rm ../fix_gravity_cuda.cpp
rm ../fix_nh_cuda.cpp
rm ../fix_npt_cuda.cpp
rm ../fix_nve_cuda.cpp
rm ../fix_nvt_cuda.cpp
rm ../fix_set_force_cuda.cpp
rm ../fix_shake_cuda.cpp
rm ../fix_temp_berendsen_cuda.cpp
rm ../fix_temp_rescale_cuda.cpp
rm ../fix_temp_rescale_limit_cuda.cpp
rm ../fix_viscous_cuda.cpp
rm ../modify_cuda.cpp
rm ../neighbor_cuda.cpp
rm ../neigh_full_cuda.cpp
rm ../pair_born_coul_long_cuda.cpp
rm ../pair_buck_coul_cut_cuda.cpp
rm ../pair_buck_coul_long_cuda.cpp
rm ../pair_buck_cuda.cpp
rm ../pair_cg_cmm_coul_cut_cuda.cpp
rm ../pair_cg_cmm_coul_debye_cuda.cpp
rm ../pair_cg_cmm_coul_long_cuda.cpp
rm ../pair_cg_cmm_cuda.cpp
rm ../pair_eam_alloy_cuda.cpp
rm ../pair_eam_cuda.cpp
rm ../pair_eam_fs_cuda.cpp
rm ../pair_gran_hooke_cuda.cpp
rm ../pair_lj96_cut_cuda.cpp
rm ../pair_lj_charmm_coul_charmm_cuda.cpp
rm ../pair_lj_charmm_coul_charmm_implicit_cuda.cpp
rm ../pair_lj_charmm_coul_long_cuda.cpp
rm ../pair_lj_class2_coul_cut_cuda.cpp
rm ../pair_lj_class2_coul_long_cuda.cpp
rm ../pair_lj_class2_cuda.cpp
rm ../pair_lj_cut_coul_cut_cuda.cpp
rm ../pair_lj_cut_coul_debye_cuda.cpp
rm ../pair_lj_cut_coul_long_cuda.cpp
rm ../pair_lj_cut_cuda.cpp
rm ../pair_lj_cut_experimental_cuda.cpp
rm ../pair_lj_expand_cuda.cpp
rm ../pair_lj_gromacs_coul_gromacs_cuda.cpp
rm ../pair_lj_gromacs_cuda.cpp
rm ../pair_lj_smooth_cuda.cpp
rm ../pair_morse_cuda.cpp
rm ../pppm_cuda.cpp
rm ../verlet_cuda.cpp
rm ../cuda.cpp
rm ../cuda_neigh_list.cpp
rm ../atom_vec_angle_cuda.h
rm ../atom_vec_atomic_cuda.h
rm ../atom_vec_charge_cuda.h
rm ../atom_vec_full_cuda.h
rm ../comm_cuda.h
rm ../compute_pe_cuda.h
rm ../compute_pressure_cuda.h
rm ../compute_temp_cuda.h
rm ../compute_temp_partial_cuda.h
rm ../domain_cuda.h
rm ../fft3d_cuda.h
rm ../fft3d_wrap_cuda.h
rm ../fix_addforce_cuda.h
rm ../fix_aveforce_cuda.h
rm ../fix_enforce2d_cuda.h
rm ../fix_freeze_cuda.h
rm ../fix_gravity_cuda.h
rm ../fix_nh_cuda.h
rm ../fix_npt_cuda.h
rm ../fix_nve_cuda.h
rm ../fix_nvt_cuda.h
rm ../fix_set_force_cuda.h
rm ../fix_shake_cuda.h
rm ../fix_temp_berendsen_cuda.h
rm ../fix_temp_rescale_cuda.h
rm ../fix_temp_rescale_limit_cuda.h
rm ../fix_viscous_cuda.h
rm ../modify_cuda.h
rm ../neighbor_cuda.h
rm ../pair_born_coul_long_cuda.h
rm ../pair_buck_coul_cut_cuda.h
rm ../pair_buck_coul_long_cuda.h
rm ../pair_buck_cuda.h
rm ../pair_cg_cmm_coul_cut_cuda.h
rm ../pair_cg_cmm_coul_debye_cuda.h
rm ../pair_cg_cmm_coul_long_cuda.h
rm ../pair_cg_cmm_cuda.h
rm ../pair_eam_alloy_cuda.h
rm ../pair_eam_cuda.h
rm ../pair_eam_fs_cuda.h
rm ../pair_gran_hooke_cuda.h
rm ../pair_lj96_cut_cuda.h
rm ../pair_lj_charmm_coul_charmm_cuda.h
rm ../pair_lj_charmm_coul_charmm_implicit_cuda.h
rm ../pair_lj_charmm_coul_long_cuda.h
rm ../pair_lj_class2_coul_cut_cuda.h
rm ../pair_lj_class2_coul_long_cuda.h
rm ../pair_lj_class2_cuda.h
rm ../pair_lj_cut_coul_cut_cuda.h
rm ../pair_lj_cut_coul_debye_cuda.h
rm ../pair_lj_cut_coul_long_cuda.h
rm ../pair_lj_cut_cuda.h
rm ../pair_lj_cut_experimental_cuda.h
rm ../pair_lj_expand_cuda.h
rm ../pair_lj_gromacs_coul_gromacs_cuda.h
rm ../pair_lj_gromacs_cuda.h
rm ../pair_lj_smooth_cuda.h
rm ../pair_morse_cuda.h
rm ../pppm_cuda.h
rm ../verlet_cuda.h
rm ../cuda.h

View File

@ -0,0 +1,476 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "atom_vec_angle_cuda.h"
#include "comm_cuda_cu.h"
#include "atom_vec_angle_cuda_cu.h"
#include "atom.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
#include "universe.h"
#include "comm.h"
using namespace LAMMPS_NS;
#define DELTA 10000
#define BUFFACTOR 1.5
#define BUFEXTRA 1000
#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image molecule
#define BUF_FLOAT double
/* ---------------------------------------------------------------------- */
AtomVecAngleCuda::AtomVecAngleCuda(LAMMPS *lmp, int narg, char **arg) :
AtomVecAngle(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
maxsend=0;
cudable=true;
cuda_init_done=false;
max_nsend=0;
cu_copylist=NULL;
copylist=NULL;
copylist2=NULL;
}
void AtomVecAngleCuda::grow_copylist(int new_max_nsend)
{
max_nsend=new_max_nsend;
delete cu_copylist;
delete [] copylist2;
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
copylist2 = new int[max_nsend];
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
}
void AtomVecAngleCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
{
int old_maxsend=*maxsend+BUFEXTRA;
*maxsend = static_cast<int> (BUFFACTOR * n);
if (flag)
{
if(cuda->pinned)
{
double* tmp = new double[old_maxsend];
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
delete [] tmp;
}
else
{
*buf_send = (double *)
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
else {
if(cuda->pinned)
{
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
}
else
{
memory->sfree(*buf_send);
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
}
void AtomVecAngleCuda::grow_both(int n)
{
if(cuda->finished_setup)
cuda->downloadAll();
AtomVecAngle::grow(n);
if(cuda->finished_setup)
{
cuda->checkResize();
cuda->uploadAll();
}
}
int AtomVecAngleCuda::pack_comm(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAngle::pack_comm(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
int AtomVecAngleCuda::pack_comm_vel(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAngle::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAngleCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAngle::unpack_comm(n,first,buf); return;}
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
}
void AtomVecAngleCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAngle::unpack_comm_vel(n,first,buf); return;}
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
}
/* ---------------------------------------------------------------------- */
int AtomVecAngleCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAngle::pack_reverse(n,first,buf);
int i,m,last;
cuda->cu_f->download();
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = f[i][0];
buf[m++] = f[i][1];
buf[m++] = f[i][2];
}
cuda->cu_f->upload();
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAngleCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAngle::unpack_reverse(n,list,buf); return;}
int i,j,m;
m = 0;
cuda->cu_f->download();
for (i = 0; i < n; i++) {
j = list[i];
f[j][0] += buf[m++];
f[j][1] += buf[m++];
f[j][2] += buf[m++];
}
cuda->cu_f->upload();
}
/* ---------------------------------------------------------------------- */
int AtomVecAngleCuda::pack_border(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAngle::pack_border(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecAngleCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
int AtomVecAngleCuda::pack_border_vel(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAngle::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecAngleCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAngleCuda::unpack_border(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAngle::unpack_border(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecAngleCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
void AtomVecAngleCuda::unpack_border_vel(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAngle::unpack_border_vel(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecAngleCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
/* ----------------------------------------------------------------------
pack data for atom I for sending to another proc
xyz must be 1st 3 values, so comm::exchange() can test on them
------------------------------------------------------------------------- */
int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
{
if(cuda->oncpu)
return AtomVecAngle::pack_exchange(dim,buf);
if(not cuda_init_done||domain->box_change)
{
Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
cuda_init_done=true;
}
double** buf_pointer=(double**) buf;
if(*maxsend<atom->nghost || *buf_pointer==NULL)
{
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
}
if(max_nsend==0) grow_copylist(200);
int nsend_atoms = Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
{
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
}
int nlocal=atom->nlocal-nsend_atoms;
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i>=nlocal) copylist2[i-nlocal]=-1;
}
int actpos=0;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i<nlocal)
{
while(copylist2[actpos]==-1) actpos++;
copylist[j-1]=nlocal+actpos;
actpos++;
}
}
cu_copylist->upload();
cuda->shared_data.atom.nlocal=nlocal;
int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
timespec time1,time2;
clock_gettime(CLOCK_REALTIME,&time1);
double* buf_p=*buf_pointer;
for(int j=0;j<nsend_atoms;j++)
{
int i=static_cast <int> (buf_p[j+1]);
int nextra=0;
int k;
buf_p[m++] = num_bond[i];
for (k = 0; k < num_bond[i]; k++) {
buf_p[m++] = bond_type[i][k];
buf_p[m++] = bond_atom[i][k];
}
nextra+=2*num_bond[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = num_angle[i];
for (k = 0; k < num_angle[i]; k++) {
buf_p[m++] = angle_type[i][k];
buf_p[m++] = angle_atom1[i][k];
buf_p[m++] = angle_atom2[i][k];
buf_p[m++] = angle_atom3[i][k];
}
nextra+=4*num_angle[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = nspecial[i][0];
buf_p[m++] = nspecial[i][1];
buf_p[m++] = nspecial[i][2];
for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
nextra+=nspecial[i][2]+3;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
{
int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
m+=dm;
nextra+=dm;
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
}
if(i<nlocal)AtomVecAngle::copy(copylist[j],i,1);
(*buf_pointer)[j+1] = nextra;
}
clock_gettime(CLOCK_REALTIME,&time2);
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
(*buf_pointer)[0] = nsend_atoms;
atom->nlocal-=nsend_atoms;
cuda->shared_data.atom.update_nlocal=2;
//printf("End Pack Exchange\n");
if(m==1) return 0;
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecAngleCuda::unpack_exchange(double *buf)
{
// printf("Begin UnPack Exchange\n");
if(cuda->oncpu)
return AtomVecAngle::unpack_exchange(buf);
double *sublo,*subhi;
int dim=cuda->shared_data.exchange_dim;
if(domain->box_change)
Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
if (domain->triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
int mfirst=0;
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
{
int nlocal = atom->nlocal;
int nsend_atoms=static_cast<int> (buf[0]);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
int naccept = Cuda_AtomVecAngleCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
cu_copylist->download();
int m = nsend_atoms*NCUDAEXCHANGE + 1;
nlocal+=naccept;
timespec time1,time2;
clock_gettime(CLOCK_REALTIME,&time1);
for(int j=0;j<nsend_atoms;j++)
{
if(copylist[j]>-1)
{
int k;
int i=copylist[j];
num_bond[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_bond[i]; k++) {
bond_type[i][k] = static_cast<int> (buf[m++]);
bond_atom[i][k] = static_cast<int> (buf[m++]);
}
num_angle[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_angle[i]; k++) {
angle_type[i][k] = static_cast<int> (buf[m++]);
angle_atom1[i][k] = static_cast<int> (buf[m++]);
angle_atom2[i][k] = static_cast<int> (buf[m++]);
angle_atom3[i][k] = static_cast<int> (buf[m++]);
}
nspecial[i][0] = static_cast<int> (buf[m++]);
nspecial[i][1] = static_cast<int> (buf[m++]);
nspecial[i][2] = static_cast<int> (buf[m++]);
for (k = 0; k < nspecial[i][2]; k++)
special[i][k] = static_cast<int> (buf[m++]);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(i,&buf[m]);
}
else
m+=static_cast <int> (buf[j+1]);
}
clock_gettime(CLOCK_REALTIME,&time2);
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
cuda->shared_data.atom.nlocal=nlocal;
cuda->shared_data.atom.update_nlocal=2;
atom->nlocal=nlocal;
mfirst+=m;
buf=&buf[m];
}
return mfirst;
}

View File

@ -0,0 +1,69 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(angle/cuda,AtomVecAngleCuda)
#else
#ifndef LMP_ATOM_VEC_ANGLE_CUDA_H
#define LMP_ATOM_VEC_ANGLE_CUDA_H
#include "atom_vec_angle.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class AtomVecAngleCuda : public AtomVecAngle {
public:
AtomVecAngleCuda(class LAMMPS *, int, char **);
virtual ~AtomVecAngleCuda() {}
void grow_copylist(int n);
void grow_send(int n,double** buf_send,int flag);
void grow_both(int n);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
private:
class Cuda *cuda;
bool cuda_init_done;
int* copylist;
int* copylist2;
cCudaData<int, int, xx >* cu_copylist;
int max_nsend;
};
}
#endif
#endif

View File

@ -0,0 +1,407 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "atom_vec_atomic_cuda.h"
#include "comm_cuda_cu.h"
#include "atom_vec_atomic_cuda_cu.h"
#include "atom.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
#include "comm.h"
using namespace LAMMPS_NS;
#define DELTA 10000
#define BUFFACTOR 1.5
#define BUFEXTRA 1000
#define NCUDAEXCHANGE 11 //nextra x y z vx vy vz tag type mask image
#define BUF_FLOAT double
/* ---------------------------------------------------------------------- */
AtomVecAtomicCuda::AtomVecAtomicCuda(LAMMPS *lmp, int narg, char **arg) :
AtomVecAtomic(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
maxsend=0;
cudable=true;
cuda_init_done=false;
max_nsend=0;
cu_copylist=NULL;
copylist=NULL;
copylist2=NULL;
}
void AtomVecAtomicCuda::grow_copylist(int new_max_nsend)
{
max_nsend=new_max_nsend;
delete cu_copylist;
delete [] copylist2;
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
copylist2 = new int[max_nsend];
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
}
void AtomVecAtomicCuda::grow_send(int n,double** buf_send,int flag)
{
int old_maxsend=*maxsend+BUFEXTRA;
*maxsend = static_cast<int> (BUFFACTOR * n);
if (flag)
{
if(cuda->pinned)
{
double* tmp = new double[old_maxsend];
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
delete [] tmp;
}
else
{
*buf_send = (double *)
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
else {
if(cuda->pinned)
{
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
}
else
{
memory->sfree(*buf_send);
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
}
void AtomVecAtomicCuda::grow_both(int n)
{
if(cuda->finished_setup)
cuda->downloadAll();
AtomVecAtomic::grow(n);
if(cuda->finished_setup)
{
cuda->checkResize();
cuda->uploadAll();
}
}
int AtomVecAtomicCuda::pack_comm(int n, int* iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAtomic::pack_comm(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
int AtomVecAtomicCuda::pack_comm_vel(int n, int* iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAtomic::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicCuda::unpack_comm(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAtomic::unpack_comm(n,first,buf); return;}
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
}
void AtomVecAtomicCuda::unpack_comm_vel(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAtomic::unpack_comm_vel(n,first,buf); return;}
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicCuda::pack_reverse(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAtomic::pack_reverse(n,first,buf);
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = f[i][0];
buf[m++] = f[i][1];
buf[m++] = f[i][2];
}
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicCuda::unpack_reverse(int n, int *list, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAtomic::unpack_reverse(n,list,buf); return;}
int i,j,m;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
f[j][0] += buf[m++];
f[j][1] += buf[m++];
f[j][2] += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicCuda::pack_border(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAtomic::pack_border(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecAtomicCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
int AtomVecAtomicCuda::pack_border_vel(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecAtomic::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecAtomicCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecAtomicCuda::unpack_border(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAtomic::unpack_border(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
{
grow_both(0);
}
int flag=Cuda_AtomVecAtomicCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
void AtomVecAtomicCuda::unpack_border_vel(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecAtomic::unpack_border_vel(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
{
grow_both(0);
}
int flag=Cuda_AtomVecAtomicCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
/* ----------------------------------------------------------------------
pack data for atom I for sending to another proc
xyz must be 1st 3 values, so comm::exchange() can test on them
------------------------------------------------------------------------- */
int AtomVecAtomicCuda::pack_exchange(int dim, double *buf)
{
if(cuda->oncpu)
return AtomVecAtomic::pack_exchange(dim,buf);
if(not cuda_init_done||domain->box_change)
{
Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
cuda_init_done=true;
}
double** buf_pointer=(double**) buf;
if(*maxsend<atom->nghost || *buf_pointer==NULL)
{
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
}
if(max_nsend==0) grow_copylist(200);
int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
{
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
}
int nlocal=atom->nlocal-nsend_atoms;
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i>=nlocal) copylist2[i-nlocal]=-1;
}
int actpos=0;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i<nlocal)
{
while(copylist2[actpos]==-1) actpos++;
copylist[j-1]=nlocal+actpos;
actpos++;
}
}
cu_copylist->upload();
cuda->shared_data.atom.nlocal=nlocal;
int m = Cuda_AtomVecAtomicCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
if (atom->nextra_grow)
for(int j=0;j<nsend_atoms;j++)
{
int i=static_cast <int> ((*buf_pointer)[j+1]);
int nextra=0;
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
m+=dm;
nextra+=dm;
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
if(m>*maxsend) grow_send(m,buf_pointer,1);
}
(*buf_pointer)[j+1] = nextra;
}
(*buf_pointer)[0] = nsend_atoms;
atom->nlocal-=nsend_atoms;
cuda->shared_data.atom.update_nlocal=2;
if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecAtomicCuda::unpack_exchange(double *buf)
{
//printf("Unpack Begin\n");
if(cuda->oncpu)
return AtomVecAtomic::unpack_exchange(buf);
double *sublo,*subhi;
int dim=cuda->shared_data.exchange_dim;
if(domain->box_change)
Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
if (domain->triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
int mfirst=0;
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
{
int nlocal = atom->nlocal;
int nsend_atoms=static_cast<int> (buf[0]);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
int naccept = Cuda_AtomVecAtomicCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
cu_copylist->download();
int m = nsend_atoms*NCUDAEXCHANGE + 1;
nlocal+=naccept;
if (atom->nextra_grow)
for(int j=0;j<nsend_atoms;j++)
{
if(copylist[j]>-1)
{
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(copylist[j],&buf[m]);
}
else
{
m+=static_cast <int> (buf[j+1]);
}
}
cuda->shared_data.atom.nlocal=nlocal;
cuda->shared_data.atom.update_nlocal=2;
atom->nlocal=nlocal;
mfirst+=m;
buf=&buf[m];
}
return mfirst;
}

View File

@ -0,0 +1,81 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(atomic/cuda,AtomVecAtomicCuda)
#else
#ifndef LMP_ATOM_VEC_ATOMIC_CUDA_H
#define LMP_ATOM_VEC_ATOMIC_CUDA_H
#include "atom_vec_atomic.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class AtomVecAtomicCuda : public AtomVecAtomic {
public:
AtomVecAtomicCuda(class LAMMPS *, int, char **);
virtual ~AtomVecAtomicCuda() {}
void grow_copylist(int n);
void grow_send(int n,double** buf_send,int flag);
void grow_both(int n);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
private:
class Cuda *cuda;
bool cuda_init_done;
int* copylist;
int* copylist2;
cCudaData<int, int, xx >* cu_copylist;
int max_nsend;
};
}
#endif
#endif

View File

@ -0,0 +1,407 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "atom_vec_charge_cuda.h"
#include "comm_cuda_cu.h"
#include "atom_vec_charge_cuda_cu.h"
#include "atom.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
#include "comm.h"
using namespace LAMMPS_NS;
#define DELTA 10000
#define BUFFACTOR 1.5
#define BUFEXTRA 1000
#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image q
#define BUF_FLOAT double
/* ---------------------------------------------------------------------- */
AtomVecChargeCuda::AtomVecChargeCuda(LAMMPS *lmp, int narg, char **arg) :
AtomVecCharge(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
maxsend=0;
cudable=true;
cuda_init_done=false;
max_nsend=0;
cu_copylist=NULL;
copylist=NULL;
copylist2=NULL;
}
void AtomVecChargeCuda::grow_copylist(int new_max_nsend)
{
max_nsend=new_max_nsend;
delete cu_copylist;
delete [] copylist2;
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
copylist2 = new int[max_nsend];
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
}
void AtomVecChargeCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
{
int old_maxsend=*maxsend+BUFEXTRA;
*maxsend = static_cast<int> (BUFFACTOR * n);
if (flag)
{
if(cuda->pinned)
{
double* tmp = new double[old_maxsend];
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
delete [] tmp;
}
else
{
*buf_send = (double *)
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
else {
if(cuda->pinned)
{
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
}
else
{
memory->sfree(*buf_send);
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
}
void AtomVecChargeCuda::grow_both(int n)
{
if(cuda->finished_setup)
cuda->downloadAll();
AtomVecCharge::grow(n);
if(cuda->finished_setup)
{
cuda->checkResize();
cuda->uploadAll();
}
}
int AtomVecChargeCuda::pack_comm(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecCharge::pack_comm(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
int AtomVecChargeCuda::pack_comm_vel(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecCharge::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecCharge::unpack_comm(n,first,buf); return;}
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
}
void AtomVecChargeCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecCharge::unpack_comm_vel(n,first,buf); return;}
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecCharge::pack_reverse(n,first,buf);
int i,m,last;
cuda->cu_f->download();
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = f[i][0];
buf[m++] = f[i][1];
buf[m++] = f[i][2];
}
cuda->cu_f->upload();
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecCharge::unpack_reverse(n,list,buf); return;}
int i,j,m;
m = 0;
cuda->cu_f->download();
for (i = 0; i < n; i++) {
j = list[i];
f[j][0] += buf[m++];
f[j][1] += buf[m++];
f[j][2] += buf[m++];
}
cuda->cu_f->upload();
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeCuda::pack_border(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecCharge::pack_border(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecChargeCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
int AtomVecChargeCuda::pack_border_vel(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecCharge::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecChargeCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecChargeCuda::unpack_border(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecCharge::unpack_border(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecChargeCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
void AtomVecChargeCuda::unpack_border_vel(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecCharge::unpack_border_vel(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecChargeCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
/* ----------------------------------------------------------------------
pack data for atom I for sending to another proc
xyz must be 1st 3 values, so comm::exchange() can test on them
------------------------------------------------------------------------- */
int AtomVecChargeCuda::pack_exchange(int dim, double *buf)
{
if(cuda->oncpu)
return AtomVecCharge::pack_exchange(dim,buf);
if(not cuda_init_done||domain->box_change)
{
Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
cuda_init_done=true;
}
double** buf_pointer=(double**) buf;
if(*maxsend<atom->nghost || *buf_pointer==NULL)
{
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
}
if(max_nsend==0) grow_copylist(200);
int nsend_atoms = Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
{
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
}
int nlocal=atom->nlocal-nsend_atoms;
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i>=nlocal) copylist2[i-nlocal]=-1;
}
int actpos=0;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i<nlocal)
{
while(copylist2[actpos]==-1) actpos++;
copylist[j-1]=nlocal+actpos;
actpos++;
}
}
cu_copylist->upload();
cuda->shared_data.atom.nlocal=nlocal;
int m = Cuda_AtomVecChargeCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
if (atom->nextra_grow)
for(int j=0;j<nsend_atoms;j++)
{
int i=static_cast <int> ((*buf_pointer)[j+1]);
int nextra=0;
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
m+=dm;
nextra+=dm;
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
if(m>*maxsend) grow_send(m,buf_pointer,1);
}
(*buf_pointer)[j+1] = nextra;
}
(*buf_pointer)[0] = nsend_atoms;
atom->nlocal-=nsend_atoms;
cuda->shared_data.atom.update_nlocal=2;
if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecChargeCuda::unpack_exchange(double *buf)
{
if(cuda->oncpu)
return AtomVecCharge::unpack_exchange(buf);
double *sublo,*subhi;
int dim=cuda->shared_data.exchange_dim;
if(domain->box_change)
Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
if (domain->triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
int mfirst=0;
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
{
int nlocal = atom->nlocal;
int nsend_atoms=static_cast<int> (buf[0]);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
int naccept = Cuda_AtomVecChargeCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
cu_copylist->download();
int m = nsend_atoms*NCUDAEXCHANGE + 1;
nlocal+=naccept;
if (atom->nextra_grow)
for(int j=0;j<nsend_atoms;j++)
{
if(copylist[j]>-1)
{
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(copylist[j],&buf[m]);
}
else
m+=static_cast <int> (buf[j+1]);
}
cuda->shared_data.atom.nlocal=nlocal;
cuda->shared_data.atom.update_nlocal=2;
atom->nlocal=nlocal;
mfirst+=m;
buf=&buf[m];
}
return mfirst;
}

View File

@ -0,0 +1,69 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(charge/cuda,AtomVecChargeCuda)
#else
#ifndef LMP_ATOM_VEC_CHARGE_CUDA_H
#define LMP_ATOM_VEC_CHARGE_CUDA_H
#include "atom_vec_charge.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class AtomVecChargeCuda : public AtomVecCharge {
public:
AtomVecChargeCuda(class LAMMPS *, int, char **);
virtual ~AtomVecChargeCuda() {}
void grow_copylist(int n);
void grow_send(int n,double** buf_send,int flag);
void grow_both(int n);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
private:
class Cuda *cuda;
bool cuda_init_done;
int* copylist;
int* copylist2;
cCudaData<int, int, xx >* cu_copylist;
int max_nsend;
};
}
#endif
#endif

View File

@ -0,0 +1,516 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "atom_vec_full_cuda.h"
#include "comm_cuda_cu.h"
#include "atom_vec_full_cuda_cu.h"
#include "atom.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
#include "universe.h"
#include "comm.h"
using namespace LAMMPS_NS;
#define DELTA 10000
#define BUFFACTOR 1.5
#define BUFEXTRA 1000
#define NCUDAEXCHANGE 13 //nextra x y z vx vy vz tag type mask image q molecule
#define BUF_FLOAT double
/* ---------------------------------------------------------------------- */
AtomVecFullCuda::AtomVecFullCuda(LAMMPS *lmp, int narg, char **arg) :
AtomVecFull(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
maxsend=0;
cudable=true;
cuda_init_done=false;
max_nsend=0;
cu_copylist=NULL;
copylist=NULL;
copylist2=NULL;
}
void AtomVecFullCuda::grow_copylist(int new_max_nsend)
{
max_nsend=new_max_nsend;
delete cu_copylist;
delete [] copylist2;
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
copylist2 = new int[max_nsend];
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
}
void AtomVecFullCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
{
int old_maxsend=*maxsend+BUFEXTRA;
*maxsend = static_cast<int> (BUFFACTOR * n);
if (flag)
{
if(cuda->pinned)
{
double* tmp = new double[old_maxsend];
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
delete [] tmp;
}
else
{
*buf_send = (double *)
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
else {
if(cuda->pinned)
{
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
}
else
{
memory->sfree(*buf_send);
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
"comm:buf_send");
}
}
}
void AtomVecFullCuda::grow_both(int n)
{
if(cuda->finished_setup)
cuda->downloadAll();
AtomVecFull::grow(n);
if(cuda->finished_setup)
{
cuda->checkResize();
cuda->uploadAll();
}
}
int AtomVecFullCuda::pack_comm(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecFull::pack_comm(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
int AtomVecFullCuda::pack_comm_vel(int n, int* iswap, double *buf,
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecFull::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecFullCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecFull::unpack_comm(n,first,buf); return;}
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
}
void AtomVecFullCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecFull::unpack_comm_vel(n,first,buf); return;}
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
}
/* ---------------------------------------------------------------------- */
int AtomVecFullCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecFull::pack_reverse(n,first,buf);
int i,m,last;
cuda->cu_f->download();
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = f[i][0];
buf[m++] = f[i][1];
buf[m++] = f[i][2];
}
cuda->cu_f->upload();
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecFullCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecFull::unpack_reverse(n,list,buf); return;}
int i,j,m;
m = 0;
cuda->cu_f->download();
for (i = 0; i < n; i++) {
j = list[i];
f[j][0] += buf[m++];
f[j][1] += buf[m++];
f[j][2] += buf[m++];
}
cuda->cu_f->upload();
}
/* ---------------------------------------------------------------------- */
int AtomVecFullCuda::pack_border(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecFull::pack_border(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecFullCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
int AtomVecFullCuda::pack_border_vel(int n, int *iswap, double *buf,
int pbc_flag, int *pbc)
{
if(not cuda->finished_setup || cuda->oncpu)
return AtomVecFull::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
int m = Cuda_AtomVecFullCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
return m;
}
/* ---------------------------------------------------------------------- */
void AtomVecFullCuda::unpack_border(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecFull::unpack_border(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecFullCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
void AtomVecFullCuda::unpack_border_vel(int n, int first, double *buf)
{
if(not cuda->finished_setup || cuda->oncpu)
{AtomVecFull::unpack_border_vel(n,first,buf); return;}
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
{
grow_both(0);
}
int flag=Cuda_AtomVecFullCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
}
/* ----------------------------------------------------------------------
pack data for atom I for sending to another proc
xyz must be 1st 3 values, so comm::exchange() can test on them
------------------------------------------------------------------------- */
int AtomVecFullCuda::pack_exchange(int dim, double *buf)
{
if(cuda->oncpu)
return AtomVecFull::pack_exchange(dim,buf);
if(not cuda_init_done||domain->box_change)
{
Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
cuda_init_done=true;
}
double** buf_pointer=(double**) buf;
if(*maxsend<atom->nghost || *buf_pointer==NULL)
{
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
}
if(max_nsend==0) grow_copylist(200);
int nsend_atoms = Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
{
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
}
int nlocal=atom->nlocal-nsend_atoms;
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i>=nlocal) copylist2[i-nlocal]=-1;
}
int actpos=0;
for(int j=1;j<nsend_atoms+1;j++)
{
int i = static_cast <int> ((*buf_pointer)[j]);
if(i<nlocal)
{
while(copylist2[actpos]==-1) actpos++;
copylist[j-1]=nlocal+actpos;
actpos++;
}
}
cu_copylist->upload();
cuda->shared_data.atom.nlocal=nlocal;
int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
timespec time1,time2;
clock_gettime(CLOCK_REALTIME,&time1);
double* buf_p=*buf_pointer;
for(int j=0;j<nsend_atoms;j++)
{
int i=static_cast <int> (buf_p[j+1]);
int nextra=0;
int k;
buf_p[m++] = num_bond[i];
for (k = 0; k < num_bond[i]; k++) {
buf_p[m++] = bond_type[i][k];
buf_p[m++] = bond_atom[i][k];
}
nextra+=2*num_bond[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = num_angle[i];
for (k = 0; k < num_angle[i]; k++) {
buf_p[m++] = angle_type[i][k];
buf_p[m++] = angle_atom1[i][k];
buf_p[m++] = angle_atom2[i][k];
buf_p[m++] = angle_atom3[i][k];
}
nextra+=4*num_angle[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = num_dihedral[i];
for (k = 0; k < num_dihedral[i]; k++) {
buf_p[m++] = dihedral_type[i][k];
buf_p[m++] = dihedral_atom1[i][k];
buf_p[m++] = dihedral_atom2[i][k];
buf_p[m++] = dihedral_atom3[i][k];
buf_p[m++] = dihedral_atom4[i][k];
}
nextra+=5*num_dihedral[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = num_improper[i];
for (k = 0; k < num_improper[i]; k++) {
buf_p[m++] = improper_type[i][k];
buf_p[m++] = improper_atom1[i][k];
buf_p[m++] = improper_atom2[i][k];
buf_p[m++] = improper_atom3[i][k];
buf_p[m++] = improper_atom4[i][k];
}
nextra+=5*num_improper[i]+1;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
buf_p[m++] = nspecial[i][0];
buf_p[m++] = nspecial[i][1];
buf_p[m++] = nspecial[i][2];
for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
nextra+=nspecial[i][2]+3;
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
{
int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
m+=dm;
nextra+=dm;
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
}
if(i<nlocal)AtomVecFull::copy(copylist[j],i,1);
(*buf_pointer)[j+1] = nextra;
}
clock_gettime(CLOCK_REALTIME,&time2);
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
(*buf_pointer)[0] = nsend_atoms;
atom->nlocal-=nsend_atoms;
cuda->shared_data.atom.update_nlocal=2;
//printf("End Pack Exchange\n");
if(m==1) return 0;
return m;
}
/* ---------------------------------------------------------------------- */
int AtomVecFullCuda::unpack_exchange(double *buf)
{
// printf("Begin UnPack Exchange\n");
if(cuda->oncpu)
return AtomVecFull::unpack_exchange(buf);
double *sublo,*subhi;
int dim=cuda->shared_data.exchange_dim;
if(domain->box_change)
Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
if (domain->triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
int mfirst=0;
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
{
int nlocal = atom->nlocal;
int nsend_atoms=static_cast<int> (buf[0]);
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
int naccept = Cuda_AtomVecFullCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
cu_copylist->download();
int m = nsend_atoms*NCUDAEXCHANGE + 1;
nlocal+=naccept;
timespec time1,time2;
clock_gettime(CLOCK_REALTIME,&time1);
for(int j=0;j<nsend_atoms;j++)
{
if(copylist[j]>-1)
{
int k;
int i=copylist[j];
num_bond[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_bond[i]; k++) {
bond_type[i][k] = static_cast<int> (buf[m++]);
bond_atom[i][k] = static_cast<int> (buf[m++]);
}
num_angle[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_angle[i]; k++) {
angle_type[i][k] = static_cast<int> (buf[m++]);
angle_atom1[i][k] = static_cast<int> (buf[m++]);
angle_atom2[i][k] = static_cast<int> (buf[m++]);
angle_atom3[i][k] = static_cast<int> (buf[m++]);
}
num_dihedral[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_dihedral[i]; k++) {
dihedral_type[i][k] = static_cast<int> (buf[m++]);
dihedral_atom1[i][k] = static_cast<int> (buf[m++]);
dihedral_atom2[i][k] = static_cast<int> (buf[m++]);
dihedral_atom3[i][k] = static_cast<int> (buf[m++]);
dihedral_atom4[i][k] = static_cast<int> (buf[m++]);
}
num_improper[i] = static_cast<int> (buf[m++]);
for (k = 0; k < num_improper[i]; k++) {
improper_type[i][k] = static_cast<int> (buf[m++]);
improper_atom1[i][k] = static_cast<int> (buf[m++]);
improper_atom2[i][k] = static_cast<int> (buf[m++]);
improper_atom3[i][k] = static_cast<int> (buf[m++]);
improper_atom4[i][k] = static_cast<int> (buf[m++]);
}
nspecial[i][0] = static_cast<int> (buf[m++]);
nspecial[i][1] = static_cast<int> (buf[m++]);
nspecial[i][2] = static_cast<int> (buf[m++]);
for (k = 0; k < nspecial[i][2]; k++)
special[i][k] = static_cast<int> (buf[m++]);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(i,&buf[m]);
}
else
m+=static_cast <int> (buf[j+1]);
}
clock_gettime(CLOCK_REALTIME,&time2);
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
cuda->shared_data.atom.nlocal=nlocal;
cuda->shared_data.atom.update_nlocal=2;
atom->nlocal=nlocal;
mfirst+=m;
buf=&buf[m];
}
return mfirst;
}

View File

@ -0,0 +1,69 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(full/cuda,AtomVecFullCuda)
#else
#ifndef LMP_ATOM_VEC_FULL_CUDA_H
#define LMP_ATOM_VEC_FULL_CUDA_H
#include "atom_vec_full.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class AtomVecFullCuda : public AtomVecFull {
public:
AtomVecFullCuda(class LAMMPS *, int, char **);
virtual ~AtomVecFullCuda() {}
void grow_copylist(int n);
void grow_send(int n,double** buf_send,int flag);
void grow_both(int n);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
private:
class Cuda *cuda;
bool cuda_init_done;
int* copylist;
int* copylist2;
cCudaData<int, int, xx >* cu_copylist;
int max_nsend;
};
}
#endif
#endif

View File

@ -55,6 +55,8 @@ enum{SINGLE,MULTI};
CommCuda::CommCuda(LAMMPS *lmp):Comm(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
cu_pbc=NULL;
cu_slablo=NULL;

483
src/USER-CUDA/comm_cuda.cu Normal file
View File

@ -0,0 +1,483 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <stdio.h>
#define MY_PREFIX comm_cuda
#include "cuda_shared.h"
#include "cuda_common.h"
#include "crm_cuda_utils.cu"
#include "comm_cuda_cu.h"
#include "comm_cuda_kernel.cu"
#include <ctime>
void Cuda_CommCuda_UpdateBuffer(cuda_shared_data* sdata,int n)
{
int size=n*3*sizeof(X_FLOAT);
if(sdata->buffersize<size)
{
MYDBG(printf("Cuda_ComputeTempCuda Resizing Buffer at %p with %i kB to\n",sdata->buffer,sdata->buffersize);)
CudaWrapper_FreeCudaData(sdata->buffer,sdata->buffersize);
sdata->buffer = CudaWrapper_AllocCudaData(size);
sdata->buffersize=size;
sdata->buffer_new++;
MYDBG(printf("New buffer at %p with %i kB\n",sdata->buffer,sdata->buffersize);)
}
cudaMemcpyToSymbol(MY_CONST(buffer), & sdata->buffer, sizeof(int*) );
}
void Cuda_CommCuda_UpdateNmax(cuda_shared_data* sdata)
{
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
cudaMemcpyToSymbol(MY_CONST(x) , & sdata->atom.x .dev_data, sizeof(X_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(X_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
}
void Cuda_CommCuda_Init(cuda_shared_data* sdata)
{
Cuda_CommCuda_UpdateNmax(sdata);
int ntypesp=sdata->atom.ntypes+1;
cudaMemcpyToSymbol(MY_CONST(cuda_ntypes) , &ntypesp, sizeof(int));
cudaMemcpyToSymbol(MY_CONST(prd) , sdata->domain.prd, 3*sizeof(X_FLOAT));
cudaMemcpyToSymbol(MY_CONST(flag) , &sdata->flag, sizeof(int*));
cudaMemcpyToSymbol(MY_CONST(debugdata) , &sdata->debugdata, sizeof(int*));
}
int Cuda_CommCuda_PackComm(cuda_shared_data* sdata,int n,int iswap,void* buf_send,int* pbc,int pbc_flag)
{
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
X_FLOAT dx=0.0;
X_FLOAT dy=0.0;
X_FLOAT dz=0.0;
if (pbc_flag != 0) {
if (sdata->domain.triclinic == 0) {
dx = pbc[0]*sdata->domain.prd[0];
dy = pbc[1]*sdata->domain.prd[1];
dz = pbc[2]*sdata->domain.prd[2];
} else {
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
dz = pbc[2]*sdata->domain.prd[2];
}}
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
cudaMemset( sdata->flag,0,sizeof(int));
clock_gettime(CLOCK_REALTIME,&time1);
void* buf=sdata->overlap_comm?sdata->comm.buf_send_dev[iswap]:sdata->buffer;
Cuda_CommCuda_PackComm_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n
,sdata->comm.maxlistlength,iswap,dx,dy,dz,buf);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_kernel_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
if(not sdata->overlap_comm)
cudaMemcpy(buf_send, sdata->buffer, n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
//cudaMemcpy(buf_send, sdata->comm.buf_send_dev[iswap], n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
clock_gettime(CLOCK_REALTIME,&time1);
sdata->cuda_timings.comm_forward_download+=
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
int aflag;
cudaMemcpy(&aflag, sdata->flag, sizeof(int), cudaMemcpyDeviceToHost);
if(aflag!=0) printf("aflag PackComm: %i\n",aflag);
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
}
return 3*n;
}
int Cuda_CommCuda_PackCommVel(cuda_shared_data* sdata,int n,int iswap,void* buf_send,int* pbc,int pbc_flag)
{
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*6*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
X_FLOAT dx=0.0;
X_FLOAT dy=0.0;
X_FLOAT dz=0.0;
if (pbc_flag != 0) {
if (sdata->domain.triclinic == 0) {
dx = pbc[0]*sdata->domain.prd[0];
dy = pbc[1]*sdata->domain.prd[1];
dz = pbc[2]*sdata->domain.prd[2];
} else {
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
dz = pbc[2]*sdata->domain.prd[2];
}}
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
cudaMemset( sdata->flag,0,sizeof(int));
clock_gettime(CLOCK_REALTIME,&time1);
void* buf=sdata->overlap_comm?sdata->comm.buf_send_dev[iswap]:sdata->buffer;
Cuda_CommCuda_PackComm_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n
,sdata->comm.maxlistlength,iswap,dx,dy,dz,buf);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_kernel_pack+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
if(not sdata->overlap_comm)
cudaMemcpy(buf_send, sdata->buffer, n*6*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
//cudaMemcpy(buf_send, sdata->comm.buf_send_dev[iswap], n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
clock_gettime(CLOCK_REALTIME,&time1);
sdata->cuda_timings.comm_forward_download+=
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
int aflag;
cudaMemcpy(&aflag, sdata->flag, sizeof(int), cudaMemcpyDeviceToHost);
if(aflag!=0) printf("aflag PackComm: %i\n",aflag);
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
}
return 6*n;
}
int Cuda_CommCuda_PackComm_Self(cuda_shared_data* sdata,int n,int iswap,int first,int* pbc,int pbc_flag)
{
MYDBG(printf(" # CUDA: CommCuda_PackComm_Self\n");)
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
static int count=-1;
count++;
X_FLOAT dx=0.0;
X_FLOAT dy=0.0;
X_FLOAT dz=0.0;
if (pbc_flag != 0) {
if (sdata->domain.triclinic == 0) {
dx = pbc[0]*sdata->domain.prd[0];
dy = pbc[1]*sdata->domain.prd[1];
dz = pbc[2]*sdata->domain.prd[2];
} else {
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
dz = pbc[2]*sdata->domain.prd[2];
}}
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
clock_gettime(CLOCK_REALTIME,&time1);
Cuda_CommCuda_PackComm_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,dx,dy,dz,first);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_kernel_self+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm_Self: Kernel execution failed");
}
return 3*n;
}
int Cuda_CommCuda_PackCommVel_Self(cuda_shared_data* sdata,int n,int iswap,int first,int* pbc,int pbc_flag)
{
MYDBG(printf(" # CUDA: CommCuda_PackComm_Self\n");)
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*6*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
static int count=-1;
count++;
X_FLOAT dx=0.0;
X_FLOAT dy=0.0;
X_FLOAT dz=0.0;
if (pbc_flag != 0) {
if (sdata->domain.triclinic == 0) {
dx = pbc[0]*sdata->domain.prd[0];
dy = pbc[1]*sdata->domain.prd[1];
dz = pbc[2]*sdata->domain.prd[2];
} else {
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
dz = pbc[2]*sdata->domain.prd[2];
}}
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
clock_gettime(CLOCK_REALTIME,&time1);
Cuda_CommCuda_PackComm_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,dx,dy,dz,first);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_kernel_self+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm_Self: Kernel execution failed");
}
return 6*n;
}
void Cuda_CommCuda_UnpackComm(cuda_shared_data* sdata,int n,int first,void* buf_recv,int iswap)
{
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
clock_gettime(CLOCK_REALTIME,&time1);
if(not sdata->overlap_comm||iswap<0)
cudaMemcpy(sdata->buffer,(void*)buf_recv, n*3*sizeof(X_FLOAT), cudaMemcpyHostToDevice);
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_upload+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
void* buf=(sdata->overlap_comm&&iswap>=0)?sdata->comm.buf_recv_dev[iswap]:sdata->buffer;
Cuda_CommCuda_UnpackComm_Kernel<<<grid, threads,0>>>(n,first,buf);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time1);
sdata->cuda_timings.comm_forward_kernel_unpack+=
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackComm: Kernel execution failed");
}
}
void Cuda_CommCuda_UnpackCommVel(cuda_shared_data* sdata,int n,int first,void* buf_recv,int iswap)
{
timespec time1,time2;
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*6*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
clock_gettime(CLOCK_REALTIME,&time1);
if(not sdata->overlap_comm||iswap<0)
cudaMemcpy(sdata->buffer,(void*)buf_recv, n*6*sizeof(X_FLOAT), cudaMemcpyHostToDevice);
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_forward_upload+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
void* buf=(sdata->overlap_comm&&iswap>=0)?sdata->comm.buf_recv_dev[iswap]:sdata->buffer;
Cuda_CommCuda_UnpackComm_Kernel<<<grid, threads,0>>>(n,first,buf);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time1);
sdata->cuda_timings.comm_forward_kernel_unpack+=
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackComm: Kernel execution failed");
}
}
int Cuda_CommCuda_PackReverse(cuda_shared_data* sdata,int n,int first,void* buf_send)
{
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(F_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
F_FLOAT* buf=(F_FLOAT*)buf_send;
F_FLOAT* f_dev=(F_FLOAT*)sdata->atom.f.dev_data;
f_dev+=first;
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
buf+=n; f_dev+=sdata->atom.nmax;
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
buf+=n; f_dev+=sdata->atom.nmax;
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
return n*3;
}
void Cuda_CommCuda_UnpackReverse(cuda_shared_data* sdata,int n,int iswap,void* buf_recv)
{
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(F_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
cudaMemcpy(sdata->buffer,buf_recv, size, cudaMemcpyHostToDevice);
Cuda_CommCuda_UnpackReverse_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap);
cudaThreadSynchronize();
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackReverse: Kernel execution failed");
}
}
void Cuda_CommCuda_UnpackReverse_Self(cuda_shared_data* sdata,int n,int iswap,int first)
{
if(sdata->atom.update_nmax)
Cuda_CommCuda_UpdateNmax(sdata);
if(sdata->atom.update_nlocal)
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
int size=n*3*sizeof(X_FLOAT);
if(sdata->buffer_new or (size>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,n);
int3 layout=getgrid(n);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x, layout.y, 1);
if(sdata->atom.nlocal>0)
{
Cuda_CommCuda_UnpackReverse_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,first);
cudaThreadSynchronize();
CUT_CHECK_ERROR("Cuda_CommCuda_PackReverse_Self: Kernel execution failed");
}
}
int Cuda_CommCuda_BuildSendlist(cuda_shared_data* sdata,int bordergroup,int ineed,int style,int atom_nfirst,int nfirst,int nlast,int dim,int iswap)
{
MYDBG(printf(" # CUDA: CommCuda_BuildSendlist\n");)
timespec time1,time2;
Cuda_CommCuda_UpdateNmax(sdata);
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
if(sdata->buffer_new or (80>sdata->buffersize))
Cuda_CommCuda_UpdateBuffer(sdata,10);
int n;
if (!bordergroup || ineed >= 2)
n=nlast-nfirst+1;
else
{
n=atom_nfirst;
if(nlast-sdata->atom.nlocal+1>n) n=nlast-sdata->atom.nlocal+1;
}
int3 layout=getgrid(n,0,512,true);
dim3 threads(layout.z, 1, 1);
dim3 grid(layout.x+1, layout.y, 1);
cudaMemset((int*) (sdata->buffer),0,sizeof(int));
clock_gettime(CLOCK_REALTIME,&time1);
if(style==1)
Cuda_CommCuda_BuildSendlist_Single<<<grid, threads,(threads.x+1)*sizeof(int)>>>(bordergroup, ineed, atom_nfirst, nfirst, nlast, dim, iswap,(X_FLOAT*) sdata->comm.slablo.dev_data,(X_FLOAT*) sdata->comm.slabhi.dev_data,(int*) sdata->comm.sendlist.dev_data,sdata->comm.maxlistlength);
else
Cuda_CommCuda_BuildSendlist_Multi<<<grid, threads,(threads.x+1)*sizeof(int)>>>(bordergroup, ineed, atom_nfirst, nfirst, nlast, dim, iswap,(X_FLOAT*) sdata->comm.multilo.dev_data,(X_FLOAT*) sdata->comm.multihi.dev_data,(int*) sdata->comm.sendlist.dev_data,sdata->comm.maxlistlength);
cudaThreadSynchronize();
clock_gettime(CLOCK_REALTIME,&time2);
sdata->cuda_timings.comm_border_kernel_buildlist+=
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
CUT_CHECK_ERROR("Cuda_CommCuda_BuildSendlist: Kernel execution failed");
int nsend;
cudaMemcpy(&nsend, sdata->buffer, sizeof(int), cudaMemcpyDeviceToHost);
return nsend;
}

View File

@ -0,0 +1,61 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstring>
#include "compute_pe_cuda.h"
#include "atom.h"
#include "update.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "modify.h"
#include "domain.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputePECuda::ComputePECuda(LAMMPS *lmp, int narg, char **arg) :
ComputePE(lmp, narg, arg)
{
cudable = 1;
}

View File

@ -0,0 +1,59 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMPUTE_CLASS
ComputeStyle(pe/cuda,ComputePECuda)
#else
#ifndef LMP_COMPUTE_PE_CUDA_H
#define LMP_COMPUTE_PE_CUDA_H
#include "compute_pe.h"
namespace LAMMPS_NS {
class ComputePECuda : public ComputePE {
public:
ComputePECuda(class LAMMPS *, int, char **);
~ComputePECuda() {}
};
}
#endif
#endif

View File

@ -0,0 +1,97 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstring>
#include <cstdlib>
#include "compute_pressure_cuda.h"
#include "atom.h"
#include "update.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
enum{DUMMY0,INVOKED_SCALAR,INVOKED_VECTOR,DUMMMY3,INVOKED_PERATOM};
/* ---------------------------------------------------------------------- */
ComputePressureCuda::ComputePressureCuda(LAMMPS *lmp, int narg, char **arg) :
ComputePressure(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
cudable = 1;
// store temperature ID used by pressure computation
// insure it is valid for temperature computation
int n = strlen(arg[3]) + 1;
char* id_temp = new char[n];
strcpy(id_temp,arg[3]);
int icompute = modify->find_compute(id_temp);
delete id_temp;
if (modify->compute[icompute]->cudable == 0)
{
error->warning("Compute pressure/cuda temperature ID is not cudable! Try a temp/cuda style.");
cudable = 0;
}
}
double ComputePressureCuda::compute_scalar()
{
if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll();
ComputePressure::compute_scalar();
}
void ComputePressureCuda::compute_vector()
{
if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll();
ComputePressure::compute_vector();
}

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMPUTE_CLASS
ComputeStyle(pressure/cuda,ComputePressureCuda)
#else
#ifndef LMP_COMPUTE_PRESSURE_CUDA_H
#define LMP_COMPUTE_PRESSURE_CUDA_H
#include "compute_pressure.h"
namespace LAMMPS_NS {
class ComputePressureCuda : public ComputePressure {
public:
ComputePressureCuda(class LAMMPS *, int, char **);
~ComputePressureCuda() {}
double compute_scalar();
void compute_vector();
private:
class Cuda *cuda;
};
}
#endif
#endif

View File

@ -0,0 +1,212 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "compute_temp_cuda.h"
#include "compute_temp_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "force.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "group.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputeTempCuda::ComputeTempCuda(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 3) error->all("Illegal compute temp/cuda command");
scalar_flag = vector_flag = 1;
size_vector = 6;
extscalar = 0;
extvector = 1;
tempflag = 1;
vector = new double[6];
cu_t_vector = 0;
cu_t_scalar = 0;
cudable=true;
}
/* ---------------------------------------------------------------------- */
ComputeTempCuda::~ComputeTempCuda()
{
delete [] vector;
delete cu_t_vector;
delete cu_t_scalar;
}
/* ---------------------------------------------------------------------- */
void ComputeTempCuda::init()
{
fix_dof = 0;
for (int i = 0; i < modify->nfix; i++)
fix_dof += modify->fix[i]->dof(igroup);
dof_compute();
}
/* ---------------------------------------------------------------------- */
void ComputeTempCuda::dof_compute()
{
double natoms = group->count(igroup);
dof = domain->dimension * natoms;
dof -= extra_dof + fix_dof;
if (dof > 0.0) tfactor = force->mvv2e / (dof * force->boltz);
else tfactor = 0.0;
}
/* ---------------------------------------------------------------------- */
double ComputeTempCuda::compute_scalar()
{
if(cuda->begin_setup)
{
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
invoked_scalar = update->ntimestep;
Cuda_ComputeTempCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_scalar->dev_data());
cu_t_scalar->download();
}
else
{
invoked_scalar = update->ntimestep;
double **v = atom->v;
double *mass = atom->mass;
double *rmass = atom->rmass;
int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double t = 0.0;
if (rmass) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * rmass[i];
} else {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) *
mass[type[i]];
}
t_scalar=t;
}
MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
if (dynamic) dof_compute();
scalar *= tfactor;
if(scalar>1e15)
{
cuda->cu_v->download();
cuda->cu_x->download();
cuda->cu_type->download();
double **v = atom->v;
double **x = atom->x;
printf("Out of v-range atoms: \n");
for(int i=0;i<atom->nlocal;i++)
if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5)
printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]);
error->all("Temperature out of range. Simulations will be abortet.\n");
}
return scalar;
}
/* ---------------------------------------------------------------------- */
void ComputeTempCuda::compute_vector()
{
int i;
if(cuda->begin_setup)
{
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
invoked_vector = update->ntimestep;
Cuda_ComputeTempCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_vector->dev_data());
cu_t_vector->download();
}
else
{
invoked_vector = update->ntimestep;
double **v = atom->v;
double *mass = atom->mass;
double *rmass = atom->rmass;
int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double massone,t[6];
for (i = 0; i < 6; i++) t[i] = 0.0;
for (i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
if (rmass) massone = rmass[i];
else massone = mass[type[i]];
t[0] += massone * v[i][0]*v[i][0];
t[1] += massone * v[i][1]*v[i][1];
t[2] += massone * v[i][2]*v[i][2];
t[3] += massone * v[i][0]*v[i][1];
t[4] += massone * v[i][0]*v[i][2];
t[5] += massone * v[i][1]*v[i][2];
}
for (i = 0; i < 6; i++) t_vector[i]=t[i];
}
MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) vector[i] *= force->mvv2e;
}

View File

@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMPUTE_CLASS
ComputeStyle(temp/cuda,ComputeTempCuda)
#else
#ifndef LMP_COMPUTE_TEMP_CUDA_H
#define LMP_COMPUTE_TEMP_CUDA_H
#include "compute.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class ComputeTempCuda : public Compute {
public:
ComputeTempCuda(class LAMMPS *, int, char **);
~ComputeTempCuda();
void init();
double compute_scalar();
void compute_vector();
private:
class Cuda *cuda;
int fix_dof;
double tfactor;
void dof_compute();
double t_vector[6];
double t_scalar;
cCudaData<double , ENERGY_FLOAT , x>* cu_t_scalar;
cCudaData<double , ENERGY_FLOAT , x>* cu_t_vector;
};
}
#endif
#endif

View File

@ -0,0 +1,357 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include "compute_temp_partial_cuda.h"
#include "compute_temp_partial_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "force.h"
#include "domain.h"
#include "modify.h"
#include "fix.h"
#include "group.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ComputeTempPartialCuda::ComputeTempPartialCuda(LAMMPS *lmp, int narg, char **arg) :
Compute(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 6) error->all("Illegal compute temp/partial command");
scalar_flag = vector_flag = 1;
size_vector = 6;
extscalar = 0;
extvector = 1;
tempflag = 1;
tempbias = 1;
xflag = atoi(arg[3]);
yflag = atoi(arg[4]);
zflag = atoi(arg[5]);
if (zflag && domain->dimension == 2)
error->all("Compute temp/partial cannot use vz for 2d systemx");
maxbias = 0;
vbiasall = NULL;
vector = new double[6];
cu_t_vector = 0;
cu_t_scalar = 0;
cu_vbiasall=NULL;
cudable=true;
}
/* ---------------------------------------------------------------------- */
ComputeTempPartialCuda::~ComputeTempPartialCuda()
{
memory->destroy(vbiasall);
delete [] vector;
delete cu_t_vector;
delete cu_t_scalar;
delete cu_vbiasall;
}
/* ---------------------------------------------------------------------- */
void ComputeTempPartialCuda::init()
{
fix_dof = 0;
for (int i = 0; i < modify->nfix; i++)
fix_dof += modify->fix[i]->dof(igroup);
dof_compute();
}
/* ---------------------------------------------------------------------- */
void ComputeTempPartialCuda::dof_compute()
{
double natoms = group->count(igroup);
int nper = xflag+yflag+zflag;
dof = nper * natoms;
dof -= (1.0*nper/domain->dimension)*fix_dof + extra_dof;
if (dof > 0) tfactor = force->mvv2e / (dof * force->boltz);
else tfactor = 0.0;
}
/* ---------------------------------------------------------------------- */
int ComputeTempPartialCuda::dof_remove(int i)
{
int nper = xflag+yflag+zflag;
return (domain->dimension - nper);
}
/* ---------------------------------------------------------------------- */
double ComputeTempPartialCuda::compute_scalar()
{
if(cuda->begin_setup)
{
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
invoked_scalar = update->ntimestep;
Cuda_ComputeTempPartialCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_scalar->dev_data(),xflag,yflag,zflag);
cu_t_scalar->download();
}
else
{
invoked_scalar = update->ntimestep;
double **v = atom->v;
double *mass = atom->mass;
double *rmass = atom->rmass;
int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double t = 0.0;
if (rmass) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) * rmass[i];
} else {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) *
mass[type[i]];
}
t_scalar=t;
}
MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
if (dynamic) dof_compute();
scalar *= tfactor;
if(scalar>1e15)
{
cuda->cu_v->download();
cuda->cu_x->download();
cuda->cu_type->download();
double **v = atom->v;
double **x = atom->x;
printf("Out of v-range atoms: \n");
for(int i=0;i<atom->nlocal;i++)
if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5)
printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]);
error->all("Temperature out of range. Simulations will be abortet.\n");
}
return scalar;
}
/* ---------------------------------------------------------------------- */
void ComputeTempPartialCuda::compute_vector()
{
int i;
if(cuda->begin_setup)
{
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
invoked_vector = update->ntimestep;
Cuda_ComputeTempPartialCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_vector->dev_data(),xflag,yflag,zflag);
cu_t_vector->download();
}
else
{
invoked_vector = update->ntimestep;
double **v = atom->v;
double *mass = atom->mass;
double *rmass = atom->rmass;
int *type = atom->type;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double massone,t[6];
for (i = 0; i < 6; i++) t[i] = 0.0;
for (i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
if (rmass) massone = rmass[i];
else massone = mass[type[i]];
t[0] += massone * xflag*v[i][0]*v[i][0];
t[1] += massone * yflag*v[i][1]*v[i][1];
t[2] += massone * zflag*v[i][2]*v[i][2];
t[3] += massone * xflag*yflag*v[i][0]*v[i][1];
t[4] += massone * xflag*zflag*v[i][0]*v[i][2];
t[5] += massone * yflag*zflag*v[i][1]*v[i][2];
}
for (i = 0; i < 6; i++) t_vector[i]=t[i];
}
MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) vector[i] *= force->mvv2e;
}
/* ----------------------------------------------------------------------
remove velocity bias from atom I to leave thermal velocity
------------------------------------------------------------------------- */
void ComputeTempPartialCuda::remove_bias(int i, double *v)
{
if (!xflag) {
vbias[0] = v[0];
v[0] = 0.0;
}
if (!yflag) {
vbias[1] = v[1];
v[1] = 0.0;
}
if (!zflag) {
vbias[2] = v[2];
v[2] = 0.0;
}
}
/* ----------------------------------------------------------------------
remove velocity bias from all atoms to leave thermal velocity
------------------------------------------------------------------------- */
void ComputeTempPartialCuda::remove_bias_all()
{
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;
if (nlocal > maxbias) {
memory->destroy(vbiasall);
maxbias = atom->nmax;
memory->create(vbiasall,maxbias,3,"temp/partial:vbiasall");
delete cu_vbiasall;
cu_vbiasall = new cCudaData<double, V_FLOAT, yx> ((double*)vbiasall, atom->nmax, 3);
}
if(cuda->begin_setup)
{
Cuda_ComputeTempPartialCuda_RemoveBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data());
}
else
{
if (!xflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
vbiasall[i][0] = v[i][0];
v[i][0] = 0.0;
}
}
if (!yflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
vbiasall[i][1] = v[i][1];
v[i][1] = 0.0;
}
}
if (!zflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
vbiasall[i][2] = v[i][2];
v[i][2] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
add back in velocity bias to atom I removed by remove_bias()
assume remove_bias() was previously called
------------------------------------------------------------------------- */
void ComputeTempPartialCuda::restore_bias(int i, double *v)
{
if (!xflag) v[0] += vbias[0];
if (!yflag) v[1] += vbias[1];
if (!zflag) v[2] += vbias[2];
}
/* ----------------------------------------------------------------------
add back in velocity bias to all atoms removed by remove_bias_all()
assume remove_bias_all() was previously called
------------------------------------------------------------------------- */
void ComputeTempPartialCuda::restore_bias_all()
{
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;
if(cuda->begin_setup)
{
Cuda_ComputeTempPartialCuda_RestoreBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data());
}
else
{
if (!xflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
v[i][0] += vbiasall[i][0];
}
if (!yflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
v[i][1] += vbiasall[i][1];
}
if (!zflag) {
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
v[i][2] += vbiasall[i][2];
}
}
}
/* ---------------------------------------------------------------------- */
double ComputeTempPartialCuda::memory_usage()
{
double bytes = maxbias * sizeof(double);
return bytes;
}

View File

@ -0,0 +1,83 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMPUTE_CLASS
ComputeStyle(temp/partial/cuda,ComputeTempPartialCuda)
#else
#ifndef LMP_COMPUTE_TEMP_PARTIAL_CUDA_H
#define LMP_COMPUTE_TEMP_PARTIAL_CUDA_H
#include "compute.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class ComputeTempPartialCuda : public Compute {
public:
ComputeTempPartialCuda(class LAMMPS *, int, char **);
~ComputeTempPartialCuda();
void init();
double compute_scalar();
void compute_vector();
int dof_remove(int);
void remove_bias(int, double *);
void remove_bias_all();
void restore_bias(int, double *);
void restore_bias_all();
double memory_usage();
private:
class Cuda *cuda;
int xflag,yflag,zflag;
int fix_dof;
double tfactor;
void dof_compute();
double t_vector[6];
double t_scalar;
cCudaData<double , ENERGY_FLOAT , x>* cu_t_scalar;
cCudaData<double , ENERGY_FLOAT , x>* cu_t_vector;
cCudaData<double, V_FLOAT, yx>* cu_vbiasall;
};
}
#endif
#endif

View File

@ -230,7 +230,7 @@ void Cuda::accelerator(int narg, char** arg)
{
if(++i==narg)
error->all("Invalid Options for 'accelerator' command. Expecting a string after 'suffix' option.");
strcpy(lmp->asuffix,arg[i]);
strcpy(lmp->suffix,arg[i]);
}
if(strcmp(arg[i],"overlap_comm")==0)
{

View File

@ -29,12 +29,16 @@
#include <algorithm>
#include "cuda.h"
#include "atom.h"
#include "error.h"
using namespace LAMMPS_NS;
CudaNeighList::CudaNeighList(LAMMPS *lmp, class NeighList* neigh_list) : Pointers(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
MYDBG(printf("# CUDA: CudaNeighList::cudaNeighList() ... start\n");)
this->neigh_list = neigh_list;
neigh_list->cuda_list=this;

View File

@ -54,6 +54,8 @@ enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
DomainCuda::DomainCuda(LAMMPS *lmp) : Domain(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
}
/* ---------------------------------------------------------------------- */

View File

@ -0,0 +1,608 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include "fft3d_cuda.h"
#include "fft3d_cuda_cu.h"
#include "remap.h"
#include <ctime>
#include "cuda_wrapper_cu.h"
#ifdef FFT_CUFFT
#endif
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
#define MAX(A,B) ((A) > (B)) ? (A) : (B)
/* ----------------------------------------------------------------------
Data layout for 3d FFTs:
data set of Nfast x Nmid x Nslow elements is owned by P procs
on input, each proc owns a subsection of the elements
on output, each proc will own a (possibly different) subsection
my subsection must not overlap with any other proc's subsection,
i.e. the union of all proc's input (or output) subsections must
exactly tile the global Nfast x Nmid x Nslow data set
when called from C, all subsection indices are
C-style from 0 to N-1 where N = Nfast or Nmid or Nslow
when called from F77, all subsection indices are
F77-style from 1 to N where N = Nfast or Nmid or Nslow
a proc can own 0 elements on input or output
by specifying hi index < lo index
on both input and output, data is stored contiguously on a processor
with a fast-varying, mid-varying, and slow-varying index
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Perform 3d FFT
Arguments:
in starting address of input data on this proc
out starting address of where output data for this proc
will be placed (can be same as in)
flag 1 for forward FFT, -1 for inverse FFT
plan plan returned by previous call to fft_3d_create_plan
------------------------------------------------------------------------- */
void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
#ifdef FFT_CUFFT
plan->iterate++;
timespec starttime,starttime2;
timespec endtime,endtime2;
int i,total,length,offset,num;
double norm;
FFT_DATA *data,*copy;
// system specific constants
// pre-remap to prepare for 1st FFTs if needed
// copy = loc for remap result
int nprocs=plan->nprocs;
if(nprocs>1)
{
if(plan->init)
clock_gettime(CLOCK_REALTIME,&starttime);
if (plan->pre_plan) {
if (plan->pre_target == 0) copy = out;
else copy = plan->copy;
if(plan->init) remap_3d((double *) in, (double *) out, (double *) plan->scratch,plan->pre_plan);
data = out;
}
else
data = in;
}
cufftResult retvalc;
if(plan->init)
{
if(nprocs>1)
{
if(sizeof(FFT_FLOAT)==sizeof(double))cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize/2,cudaMemcpyHostToDevice);
if(sizeof(FFT_FLOAT)==sizeof(float)) cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice);
initfftdata((double*)plan->cudata2,(FFT_FLOAT*)plan->cudata,plan->nfast,plan->nmid,plan->nslow);
}
}
if (flag == -1)
{
retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_FORWARD);
}
else
{
retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_INVERSE);
}
if(retvalc!=CUFFT_SUCCESS) {printf("ErrorCUFFT: %i\n",retvalc);exit(EXIT_FAILURE);}
FFTsyncthreads();
#endif
}
/* ----------------------------------------------------------------------
Create plan for performing a 3d FFT
Arguments:
comm MPI communicator for the P procs which own the data
nfast,nmid,nslow size of global 3d matrix
in_ilo,in_ihi input bounds of data I own in fast index
in_jlo,in_jhi input bounds of data I own in mid index
in_klo,in_khi input bounds of data I own in slow index
out_ilo,out_ihi output bounds of data I own in fast index
out_jlo,out_jhi output bounds of data I own in mid index
out_klo,out_khi output bounds of data I own in slow index
scaled 0 = no scaling of result, 1 = scaling
permute permutation in storage order of indices on output
0 = no permutation
1 = permute once = mid->fast, slow->mid, fast->slow
2 = permute twice = slow->fast, fast->mid, mid->slow
nbuf returns size of internal storage buffers used by FFT
------------------------------------------------------------------------- */
struct fft_plan_3d *fft_3d_create_plan_cuda(
MPI_Comm comm, int nfast, int nmid, int nslow,
int in_ilo, int in_ihi, int in_jlo, int in_jhi,
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf,bool ainit)
{
#ifdef FFT_CUFFT
struct fft_plan_3d *plan;
int me,nprocs;
int i,num,flag,remapflag,fftflag;
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
int out_size,first_size,second_size,third_size,copy_size,scratch_size;
int np1,np2,ip1,ip2;
int list[50];
// system specific variables
// query MPI info
MPI_Comm_rank(comm,&me);
MPI_Comm_size(comm,&nprocs);
#ifndef FFT_CUFFT
error->all("ERROR: Trying to use cuda fft without FFT_CUFFT set. Recompile with make option 'cufft=1'.");
#endif
// compute division of procs in 2 dimensions not on-processor
bifactor_cuda(nprocs,&np1,&np2);
ip1 = me % np1;
ip2 = me/np1;
// in case of CUDA FFT every proc does the full FFT in order to avoid data transfers (the problem is other wise heavily bandwidth limited)
int ip1out = ip1;
int ip2out = ip2;
int np1out = np1;
int np2out = np2;
ip1 = 0;
ip2 = 0;
np1 = 1;
np2 = 1;
// allocate memory for plan data struct
plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
if (plan == NULL) return NULL;
plan->init=ainit;
// remap from initial distribution to layout needed for 1st set of 1d FFTs
// not needed if all procs own entire fast axis initially
// first indices = distribution after 1st set of FFTs
if (in_ilo == 0 && in_ihi == nfast-1)
flag = 0;
else
flag = 1;
if(nprocs>1)flag=1;
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
if (remapflag == 0) {
first_ilo = in_ilo;
first_ihi = in_ihi;
first_jlo = in_jlo;
first_jhi = in_jhi;
first_klo = in_klo;
first_khi = in_khi;
plan->pre_plan = NULL;
}
else {
first_ilo = 0;
first_ihi = nfast - 1;
first_jlo = ip1*nmid/np1;
first_jhi = (ip1+1)*nmid/np1 - 1;
first_klo = ip2*nslow/np2;
first_khi = (ip2+1)*nslow/np2 - 1;
int members=2;
if(plan->init) members=1;
plan->pre_plan =
remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,
members,0,0,2);
if (plan->pre_plan == NULL) return NULL;
}
// 1d FFTs along fast axis
plan->length1 = nfast;
plan->total1 = nfast * nmid * nslow;
// remap from 1st to 2nd FFT
// choose which axis is split over np1 vs np2 to minimize communication
// second indices = distribution after 2nd set of FFTs
second_ilo = ip1*nfast/np1;
second_ihi = (ip1+1)*nfast/np1 - 1;
second_jlo = 0;
second_jhi = nmid - 1;
second_klo = ip2*nslow/np2;
second_khi = (ip2+1)*nslow/np2 - 1;
plan->mid1_plan =
remap_3d_create_plan(comm,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,
second_ilo,second_ihi,second_jlo,second_jhi,
second_klo,second_khi,
2,1,0,2);
if (plan->mid1_plan == NULL) return NULL;
// 1d FFTs along mid axis
plan->length2 = nmid;
plan->total2 = nfast * nmid * nslow;
// remap from 2nd to 3rd FFT
// if final distribution is permute=2 with all procs owning entire slow axis
// then this remapping goes directly to final distribution
// third indices = distribution after 3rd set of FFTs
flag=1;
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
if (remapflag == 0) {
third_ilo = out_ilo;
third_ihi = out_ihi;
third_jlo = out_jlo;
third_jhi = out_jhi;
third_klo = out_klo;
third_khi = out_khi;
}
else {
third_ilo = ip1*nfast/np1;
third_ihi = (ip1+1)*nfast/np1 - 1;
third_jlo = ip2*nmid/np2;
third_jhi = (ip2+1)*nmid/np2 - 1;
third_klo = 0;
third_khi = nslow - 1;
}
plan->mid2_plan =
remap_3d_create_plan(comm,
second_jlo,second_jhi,second_klo,second_khi,
second_ilo,second_ihi,
third_jlo,third_jhi,third_klo,third_khi,
third_ilo,third_ihi,
2,1,0,2);
if (plan->mid2_plan == NULL) return NULL;
// 1d FFTs along slow axis
plan->length3 = nslow;
plan->total3 = nfast * nmid * nslow;
// remap from 3rd FFT to final distribution
// not needed if permute = 2 and third indices = out indices on all procs
flag=1;
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
if (remapflag == 0)
plan->post_plan = NULL;
else {
plan->post_plan =
remap_3d_create_plan(comm,
third_klo,third_khi,third_ilo,third_ihi,
third_jlo,third_jhi,
out_klo,out_khi,out_ilo,out_ihi,
out_jlo,out_jhi,
2,(permute+1)%3,0,2);
if (plan->post_plan == NULL) return NULL;
}
// configure plan memory pointers and allocate work space
// out_size = amount of memory given to FFT by user
// first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps
// copy_size = amount needed internally for extra copy of data
// scratch_size = amount needed internally for remap scratch space
// for each remap:
// out space used for result if big enough, else require copy buffer
// accumulate largest required remap scratch space
out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1);
first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) *
(first_khi-first_klo+1);
second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) *
(second_khi-second_klo+1);
third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) *
(third_khi-third_klo+1);
plan->ihi_out=out_ihi;
plan->ilo_out=out_ilo;
plan->jhi_out=out_jhi;
plan->jlo_out=out_jlo;
plan->khi_out=out_khi;
plan->klo_out=out_klo;
copy_size = 0;
scratch_size = 0;
if (plan->pre_plan) {
if (first_size <= out_size)
plan->pre_target = 0;
else {
plan->pre_target = 1;
copy_size = MAX(copy_size,first_size);
}
scratch_size = MAX(scratch_size,first_size);
}
if (plan->mid1_plan) {
if (second_size <= out_size)
plan->mid1_target = 0;
else {
plan->mid1_target = 1;
copy_size = MAX(copy_size,second_size);
}
scratch_size = MAX(scratch_size,second_size);
}
if (plan->mid2_plan) {
if (third_size <= out_size)
plan->mid2_target = 0;
else {
plan->mid2_target = 1;
copy_size = MAX(copy_size,third_size);
}
scratch_size = MAX(scratch_size,third_size);
}
if (plan->post_plan)
scratch_size = MAX(scratch_size,out_size);
*nbuf = copy_size + scratch_size;
if (copy_size) {
plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA));
if (plan->copy == NULL) return NULL;
}
else plan->copy = NULL;
if (scratch_size) {
plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA));
if (plan->scratch == NULL) return NULL;
}
else plan->scratch = NULL;
// system specific pre-computation of 1d FFT coeffs
// and scaling normalization
cufftResult retvalc;
int nfft = (in_ihi-in_ilo+1) * (in_jhi-in_jlo+1) *
(in_khi-in_klo+1);
int nfft_brick = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
(out_khi-out_klo+1);
int nfft_both = MAX(nfft,nfft_brick);
nfft_both=nfast*nmid*nslow;
plan->cudatasize=nfft_both*sizeof(FFT_DATA);
//retvalc=cufftPlan1d(&(plan->plan_fast), nfast, CUFFT_PLAN,plan->total1/nfast);
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT1: %i\n",retvalc);
plan->nfast=nfast;
//retvalc=cufftPlan1d(&(plan->plan_mid), nmid, CUFFT_PLAN,plan->total2/nmid);
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT2: %i\n",retvalc);
plan->nmid=nmid;
//retvalc=cufftPlan1d(&(plan->plan_slow), nslow, CUFFT_PLAN,plan->total3/nslow);
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc);
plan->nslow=nslow;
retvalc=cufftPlan3d(&(plan->plan_3d), nslow,nmid,nfast, CUFFT_PLAN);
if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc);
plan->nprocs=nprocs;
plan->me=me;
if (scaled == 0)
plan->scaled = 0;
else {
plan->scaled = 1;
plan->norm = 1.0/(nfast*nmid*nslow);
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
(out_khi-out_klo+1);
}
plan->coretime=0;
plan->iterate=0;
plan->ffttime=0;
return plan;
#endif
}
/* ----------------------------------------------------------------------
Destroy a 3d fft plan
------------------------------------------------------------------------- */
void fft_3d_destroy_plan_cuda(struct fft_plan_3d *plan)
{
#ifdef FFT_CUFFT
if (plan->pre_plan) remap_3d_destroy_plan(plan->pre_plan);
if (plan->mid1_plan) remap_3d_destroy_plan(plan->mid1_plan);
if (plan->mid2_plan) remap_3d_destroy_plan(plan->mid2_plan);
if (plan->post_plan) remap_3d_destroy_plan(plan->post_plan);
if (plan->copy) free(plan->copy);
if (plan->scratch) free(plan->scratch);
//cufftDestroy(plan->plan_fast);
//cufftDestroy(plan->plan_mid);
//cufftDestroy(plan->plan_slow);
cufftDestroy(plan->plan_3d);
free(plan);
#endif
}
/* ----------------------------------------------------------------------
recursively divide n into small factors, return them in list
------------------------------------------------------------------------- */
void factor_cuda(int n, int *num, int *list)
{
if (n == 1) {
return;
}
else if (n % 2 == 0) {
*list = 2;
(*num)++;
factor_cuda(n/2,num,list+1);
}
else if (n % 3 == 0) {
*list = 3;
(*num)++;
factor_cuda(n/3,num,list+1);
}
else if (n % 5 == 0) {
*list = 5;
(*num)++;
factor_cuda(n/5,num,list+1);
}
else if (n % 7 == 0) {
*list = 7;
(*num)++;
factor_cuda(n/7,num,list+1);
}
else if (n % 11 == 0) {
*list = 11;
(*num)++;
factor_cuda(n/11,num,list+1);
}
else if (n % 13 == 0) {
*list = 13;
(*num)++;
factor_cuda(n/13,num,list+1);
}
else {
*list = n;
(*num)++;
return;
}
}
/* ----------------------------------------------------------------------
divide n into 2 factors of as equal size as possible
------------------------------------------------------------------------- */
void bifactor_cuda(int n, int *factor1, int *factor2)
{
int n1,n2,facmax;
facmax = static_cast<int> (sqrt((double) n));
for (n1 = facmax; n1 > 0; n1--) {
n2 = n/n1;
if (n1*n2 == n) {
*factor1 = n1;
*factor2 = n2;
return;
}
}
}
/* ----------------------------------------------------------------------
perform just the 1d FFTs needed by a 3d FFT, no data movement
used for timing purposes
Arguments:
in starting address of input data on this proc, all set to 0.0
nsize size of in
flag 1 for forward FFT, -1 for inverse FFT
plan plan returned by previous call to fft_3d_create_plan
------------------------------------------------------------------------- */
void fft_1d_only_cuda(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
{
#ifdef FFT_CUFFT
int i,total,length,offset,num;
double norm;
// system specific constants
// total = size of data needed in each dim
// length = length of 1d FFT in each dim
// total/length = # of 1d FFTs in each dim
// if total > nsize, limit # of 1d FFTs to available size of data
int total1 = plan->total1;
int length1 = plan->length1;
int total2 = plan->total2;
int length2 = plan->length2;
int total3 = plan->total3;
int length3 = plan->length3;
if (total1 > nsize) total1 = (nsize/length1) * length1;
if (total2 > nsize) total2 = (nsize/length2) * length2;
if (total3 > nsize) total3 = (nsize/length3) * length3;
// perform 1d FFTs in each of 3 dimensions
// data is just an array of 0.0
cudaMemcpy((void**) &(plan->cudata), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice);
if (flag == -1) {
cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD);
/*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_FORWARD);
cufft(plan->plan_mid, plan->cudata, plan->cudata,CUFFT_FORWARD);
cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_FORWARD);*/
} else {
cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD);
/*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_INVERSE);
cufft(plan->plan_mid,plan->cudata, plan->cudata,CUFFT_INVERSE);
cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_INVERSE);*/
}
cudaMemcpy((void*) data, (void**) &(plan->cudata), plan->cudatasize,cudaMemcpyDeviceToHost);
// scaling if required
// limit num to size of data
#endif
}

148
src/USER-CUDA/fft3d_cuda.h Normal file
View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
// User-settable FFT precision
// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)
#include "cuda_precision.h"
//#define FFT_PRECISION 2
// -------------------------------------------------------------------------
// Data types for single-precision complex
#if FFT_PRECISION_CU == 1
#ifdef FFT_CUFFT
#include "cuda_runtime.h"
#include "cufft.h"
typedef struct {
float re;
float im;
} FFT_DATA;
typedef cufftComplex cufftData;
typedef cufftReal cufftDataInit;
#define cufft cufftExecC2C
#define cufftinit cufftExecR2C
#define CUFFT_PLAN CUFFT_C2C
#define CUFFT_PLAN_INIT CUFFT_R2C
#else
typedef struct {
float re;
float im;
} FFT_DATA;
#endif
#endif
// -------------------------------------------------------------------------
// Data types for double-precision complex
#if FFT_PRECISION_CU == 2
#ifdef FFT_CUFFT
#include "cuda_runtime.h"
#include "cufft.h"
typedef cufftDoubleComplex cufftData;
typedef cufftDoubleReal cufftDataInit;
typedef struct {
double re;
double im;
} FFT_DATA;
#define cufft cufftExecZ2Z
#define cufftinit cufftExecD2Z
#define CUFFT_PLAN CUFFT_Z2Z
#define CUFFT_PLAN_INIT CUFFT_D2Z
#endif
#endif
// -------------------------------------------------------------------------
// details of how to do a 3d FFT
struct fft_plan_3d {
struct remap_plan_3d *pre_plan; // remap from input -> 1st FFTs
struct remap_plan_3d *mid1_plan; // remap from 1st -> 2nd FFTs
struct remap_plan_3d *mid2_plan; // remap from 2nd -> 3rd FFTs
struct remap_plan_3d *post_plan; // remap from 3rd FFTs -> output
FFT_DATA *copy; // memory for remap results (if needed)
FFT_DATA *scratch; // scratch space for remaps
int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length)
int length1,length2,length3; // length of 1st,2nd,3rd FFTs
int pre_target; // where to put remap results
int mid1_target,mid2_target;
int scaled; // whether to scale FFT results
int normnum; // # of values to rescale
double norm; // normalization factor for rescaling
double coretime;
double ffttime;
int iterate;
// system specific 1d FFT info
#ifdef FFT_CUFFT
//CUdeviceptr cudata;
cufftData* cudata;
cufftData* cudata2;
unsigned int cudatasize;
cufftHandle plan_fast;
cufftHandle plan_mid;
cufftHandle plan_slow;
cufftHandle plan_3d;
int nfast;
int nmid;
int nslow;
int ihi_out,ilo_out,jhi_out,jlo_out,khi_out,klo_out;
int me,nprocs;
#endif
int init;
};
// function prototypes
void fft_3d_destroy_plan_cuda(struct fft_plan_3d *);
void factor_cuda(int, int *, int *);
void bifactor_cuda(int, int *, int *);
void fft_1d_only_cuda(FFT_DATA *, int, int, struct fft_plan_3d *);
void fft_3d_cudaA(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *);
void fft_3d_cuda(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *);
struct fft_plan_3d *fft_3d_create_plan_cuda(MPI_Comm, int, int, int,
int, int, int, int, int, int, int, int, int, int, int, int,
int, int, int *,bool init);

View File

@ -0,0 +1,111 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "fft3d_wrap_cuda.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FFT3dCuda::FFT3dCuda(LAMMPS *lmp, MPI_Comm comm, int nfast, int nmid, int nslow,
int in_ilo, int in_ihi, int in_jlo, int in_jhi,
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf,bool init) : Pointers(lmp)
{
#ifdef FFT_CUFFT
plan = fft_3d_create_plan_cuda(comm,nfast,nmid,nslow,
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
scaled,permute,nbuf,init);
#endif
#ifndef FFT_CUFFT
plan = fft_3d_create_plan(comm,nfast,nmid,nslow,
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
scaled,permute,nbuf);
#endif
if (plan == NULL) error->one("Could not create 3d FFT plan");
}
/* ---------------------------------------------------------------------- */
FFT3dCuda::~FFT3dCuda()
{
#ifdef FFT_CUFFT
fft_3d_destroy_plan_cuda(plan);
#endif
#ifndef FFT_CUFFT
fft_3d_destroy_plan(plan);
#endif
}
/* ---------------------------------------------------------------------- */
void FFT3dCuda::compute(double *in, double *out, int flag)
{
#ifdef FFT_CUFFT
fft_3d_cuda((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
#endif
#ifndef FFT_CUFFT
fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
#endif
}
/* ---------------------------------------------------------------------- */
void FFT3dCuda::timing1d(double *in, int nsize, int flag)
{
#ifdef FFT_CUFFT
fft_1d_only_cuda((FFT_DATA *) in,nsize,flag,plan);
#endif
#ifndef FFT_CUFFT
fft_1d_only((FFT_DATA *) in,nsize,flag,plan);
#endif
}
#ifdef FFT_CUFFT
void FFT3dCuda::set_cudata(void* cudata,void* cudata2)
{
plan->cudata=(cufftData*) cudata;
plan->cudata2=(cufftData*) cudata2;
}
#endif

View File

@ -0,0 +1,68 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef FFT3D_WRAP_CUDA_H_
#define FFT3D_WRAP_CUDA_H_
#include "pointers.h"
#ifdef FFT_CUFFT
#include "fft3d_cuda.h"
#endif
#ifndef FFT_CUFFT
#include "fft3d.h"
#endif
namespace LAMMPS_NS {
class FFT3dCuda : protected Pointers {
public:
FFT3dCuda(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,int,int,int,
int,int,int,int,int,int,int,int,int *,bool);
~FFT3dCuda();
void compute(double *, double *, int);
void timing1d(double *, int, int);
#ifdef FFT_CUFFT
void set_cudata(void* cudata,void* cudata2);
#endif
private:
struct fft_plan_3d *plan;
};
}
#endif /*FFT3D_WRAP_CUDA_H_*/

View File

@ -0,0 +1,190 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include "fix_addforce_cuda.h"
#include "fix_addforce_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "domain.h"
#include "cuda.h"
#include "memory.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixAddForceCuda::FixAddForceCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg < 6) error->all("Illegal fix addforce/cuda command");
scalar_flag = 1;
vector_flag = 1;
size_vector = 3;
global_freq = 1;
extscalar = 1;
extvector = 1;
xvalue = atof(arg[3]);
yvalue = atof(arg[4]);
zvalue = atof(arg[5]);
// optional args
iregion = -1;
int iarg = 6;
while (iarg < narg) {
if (strcmp(arg[iarg],"region") == 0) {
if (iarg+2 > narg) error->all("Illegal fix addforce/cuda command");
iregion = domain->find_region(arg[iarg+1]);
if (iregion == -1) error->all("Fix addforce/cuda region ID does not exist");
iarg += 2;
} else error->all("Illegal fix addforce/cuda command");
}
if(iregion!=-1) error->all("Error: fix addforce/cuda does not currently support 'region' option");
force_flag = 0;
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
cu_foriginal = NULL;
}
/* ---------------------------------------------------------------------- */
int FixAddForceCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
mask |= THERMO_ENERGY_CUDA;
mask |= POST_FORCE_RESPA;
mask |= MIN_POST_FORCE_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::init()
{
if(not cu_foriginal)
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
if (strcmp(update->integrate_style,"respa") == 0)
nlevels_respa = ((Respa *) update->integrate)->nlevels;
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::setup(int vflag)
{
MYDBG( printf("# CUDA: FixAddForceCuda::setup\n"); )
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixAddForceCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
post_force(vflag);
cuda->cu_f->download();
}
else {
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
cuda->cu_f->download();
post_force_respa(vflag,nlevels_respa-1,0);
cuda->cu_f->upload();
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
}
MYDBG( printf("# CUDA: FixAddForceCuda::setup done\n"); )
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::min_setup(int vflag)
{
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::post_force(int vflag)
{
MYDBG( printf("# CUDA: FixAddForceCuda::postforce start\n"); )
force_flag = 0;
cu_foriginal->memset_device(0);
Cuda_FixAddForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_FLOAT*) cu_foriginal->dev_data());
cu_foriginal->download();
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
{
if (ilevel == nlevels_respa-1) post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixAddForceCuda::min_post_force(int vflag)
{
post_force(vflag);
}
/* ----------------------------------------------------------------------
potential energy of added force
------------------------------------------------------------------------- */
double FixAddForceCuda::compute_scalar()
{
// only sum across procs one time
if (force_flag == 0) {
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
force_flag = 1;
}
return foriginal_all[0];
}
/* ----------------------------------------------------------------------
return components of total force on fix group before force was changed
------------------------------------------------------------------------- */
double FixAddForceCuda::compute_vector(int n)
{
// only sum across procs one time
if (force_flag == 0) {
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
force_flag = 1;
}
return foriginal_all[n+1];
}

View File

@ -0,0 +1,64 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(addforce/cuda,FixAddForceCuda)
#else
#ifndef LMP_FIX_ADD_FORCE_CUDA_H
#define LMP_FIX_ADD_FORCE_CUDA_H
#include "fix.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixAddForceCuda : public Fix {
public:
FixAddForceCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void min_setup(int);
void post_force(int);
void post_force_respa(int, int, int);
void min_post_force(int);
double compute_scalar();
double compute_vector(int);
private:
class Cuda *cuda;
int iregion;
double xvalue,yvalue,zvalue;
double foriginal[4],foriginal_all[4];
cCudaData<double , F_FLOAT , x>* cu_foriginal;
int force_flag;
int nlevels_respa;
};
}
#endif
#endif

View File

@ -0,0 +1,229 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cstring>
#include <cstdlib>
#include "fix_aveforce_cuda.h"
#include "fix_aveforce_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "domain.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 6) error->all("Illegal fix aveforce command");
vector_flag = 1;
size_vector = 3;
global_freq = 1;
extvector = 1;
xflag = yflag = zflag = 1;
if (strcmp(arg[3],"NULL") == 0) xflag = 0;
else xvalue = atof(arg[3]);
if (strcmp(arg[4],"NULL") == 0) yflag = 0;
else yvalue = atof(arg[4]);
if (strcmp(arg[5],"NULL") == 0) zflag = 0;
else zvalue = atof(arg[5]);
// optional args
iregion = -1;
int iarg = 6;
while (iarg < narg) {
if (strcmp(arg[iarg],"region") == 0) {
if (iarg+2 > narg) error->all("Illegal fix aveforce command");
iregion = domain->find_region(arg[iarg+1]);
if (iregion == -1) error->all("Fix aveforce region ID does not exist");
iarg += 2;
} else error->all("Illegal fix aveforce command");
}
if(iregion!=-1) error->all("Error: fix aveforce/cuda does not currently support 'region' option");
foriginal_all[0] = foriginal_all[1] = foriginal_all[2] = foriginal_all[3] = 0.0;
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
cu_foriginal = NULL;
}
/* ---------------------------------------------------------------------- */
int FixAveForceCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
mask |= POST_FORCE_RESPA;
mask |= MIN_POST_FORCE_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::init()
{
if(not cu_foriginal)
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
if (strcmp(update->integrate_style,"respa") == 0)
nlevels_respa = ((Respa *) update->integrate)->nlevels;
// ncount = total # of atoms in group
int *mask = atom->mask;
int nlocal = atom->nlocal;
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::setup(int vflag)
{
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixAveForceCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
post_force(vflag);
cuda->cu_f->download();
}
else
{
cuda->cu_f->download();
for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
((Respa *) update->integrate)->copy_flevel_f(ilevel);
post_force_respa(vflag,ilevel,0);
((Respa *) update->integrate)->copy_f_flevel(ilevel);
}
cuda->cu_f->upload();
}
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::min_setup(int vflag)
{
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::post_force(int vflag)
{
// sum forces on participating atoms
cu_foriginal->memset_device(0);
Cuda_FixAveForceCuda_PostForce_FOrg(&cuda->shared_data, groupbit,(F_FLOAT*) cu_foriginal->dev_data());
cu_foriginal->download();
// average the force on participating atoms
// add in requested amount
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
int ncount = static_cast<int> (foriginal_all[3]);
if (ncount == 0) return;
double fave[3];
fave[0] = foriginal_all[0]/ncount + xvalue;
fave[1] = foriginal_all[1]/ncount + yvalue;
fave[2] = foriginal_all[2]/ncount + zvalue;
// set force of all participating atoms to same value
// only for active dimensions
Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,xflag,yflag,zflag,fave[0],fave[1],fave[2]);
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
{
// ave + extra force on outermost level
// just ave on inner levels
if (ilevel == nlevels_respa-1) post_force(vflag);
else {
cuda->cu_f->download();
cuda->cu_mask->download();
double **f = atom->f;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double foriginal[4];
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
foriginal[0] += f[i][0];
foriginal[1] += f[i][1];
foriginal[2] += f[i][2];
foriginal[3] += 1;
}
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
int ncount = static_cast<int> (foriginal_all[3]);
if (ncount == 0) return;
double fave[3];
fave[0] = foriginal_all[0]/ncount;
fave[1] = foriginal_all[1]/ncount;
fave[2] = foriginal_all[2]/ncount;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
if (xflag) f[i][0] = fave[0];
if (yflag) f[i][1] = fave[1];
if (zflag) f[i][2] = fave[2];
}
cuda->cu_f->upload();
}
}
/* ---------------------------------------------------------------------- */
void FixAveForceCuda::min_post_force(int vflag)
{
post_force(vflag);
}
/* ----------------------------------------------------------------------
return components of total force on fix group before force was changed
------------------------------------------------------------------------- */
double FixAveForceCuda::compute_vector(int n)
{
return foriginal_all[n];
}

View File

@ -0,0 +1,64 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(aveforce/cuda,FixAveForceCuda)
#else
#ifndef LMP_FIX_AVE_FORCE_CUDA_H
#define LMP_FIX_AVE_FORCE_CUDA_H
#include "fix.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixAveForceCuda : public Fix {
public:
FixAveForceCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void min_setup(int);
void post_force(int);
void post_force_respa(int, int, int);
void min_post_force(int);
double compute_vector(int);
private:
class Cuda *cuda;
int xflag,yflag,zflag,iregion;
double xvalue,yvalue,zvalue;
double foriginal_all[4];
double foriginal[4];
cCudaData<double , F_FLOAT , x>* cu_foriginal;
int nlevels_respa;
};
}
#endif
#endif

View File

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstring>
#include "fix_enforce2d_cuda.h"
#include "fix_enforce2d_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "domain.h"
#include "respa.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixEnforce2DCuda::FixEnforce2DCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 3) error->all("Illegal fix enforce2d command");
}
/* ---------------------------------------------------------------------- */
int FixEnforce2DCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
mask |= POST_FORCE_RESPA;
mask |= MIN_POST_FORCE_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::init()
{
if (domain->dimension == 3)
error->all("Cannot use fix enforce2d/cuda with 3d simulation");
if (atom->omega_flag)
error->warning("Enforce2d/cuda does not support omega_flag on gpu yet. Will be handled on cpu.");
if (atom->angmom_flag)
error->warning("Enforce2d/cuda does not support angmom_flag (angular momentum) on gpu yet. Will be handled on cpu.");
if (atom->torque_flag)
error->warning("Enforce2d/cuda does not support torque_flag on gpu yet. Will be handled on cpu.");
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::setup(int vflag)
{
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixEnforce2dCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
cuda->cu_v->upload();
post_force(vflag);
cuda->cu_f->download();
cuda->cu_v->download();
}
else {
int nlevels_respa = ((Respa *) update->integrate)->nlevels;
for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
((Respa *) update->integrate)->copy_flevel_f(ilevel);
post_force_respa(vflag,ilevel,0);
((Respa *) update->integrate)->copy_f_flevel(ilevel);
}
}
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::min_setup(int vflag)
{
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::post_force(int vflag)
{
Cuda_FixEnforce2dCuda_PostForce(&cuda->shared_data, groupbit);
int *mask = atom->mask;
int nlocal = atom->nlocal;
if (igroup == atom->firstgroup) nlocal = atom->nfirst;
if (atom->omega_flag) {
double **omega = atom->omega;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
omega[i][0] = 0.0;
omega[i][1] = 0.0;
}
}
if (atom->angmom_flag) {
double **angmom = atom->angmom;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
angmom[i][0] = 0.0;
angmom[i][1] = 0.0;
}
}
if (atom->torque_flag) {
double **torque = atom->torque;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
}
}
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::post_force_respa(int vflag, int ilevel, int iloop)
{
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixEnforce2DCuda::min_post_force(int vflag)
{
post_force(vflag);
}

View File

@ -0,0 +1,55 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(enforce2d/cuda,FixEnforce2DCuda)
#else
#ifndef LMP_FIX_ENFORCE2D_CUDA_H
#define LMP_FIX_ENFORCE2D_CUDA_H
#include "fix.h"
namespace LAMMPS_NS {
class FixEnforce2DCuda : public Fix {
public:
FixEnforce2DCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void min_setup(int);
void post_force(int);
void post_force_respa(int, int, int);
void min_post_force(int);
private:
class Cuda *cuda;
};
}
#endif
#endif

View File

@ -0,0 +1,135 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include "fix_freeze_cuda.h"
#include "fix_freeze_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "cuda.h"
#include "memory.h"
#include "modify.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixFreezeCuda::FixFreezeCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 3) error->all("Illegal fix freeze command");
if (!atom->torque_flag)
error->all("Fix freeze requires atom attribute torque");
vector_flag = 1;
size_vector = 3;
global_freq = 1;
extvector = 1;
force_flag = 0;
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
cu_foriginal=NULL;
}
/* ---------------------------------------------------------------------- */
int FixFreezeCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
mask |= THERMO_ENERGY_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixFreezeCuda::init()
{
if(not cu_foriginal)
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,3);
int count = 0;
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"freeze") == 0) count++;
if (count > 1) error->all("More than one fix freeze");
}
/* ---------------------------------------------------------------------- */
void FixFreezeCuda::setup(int vflag)
{
MYDBG( printf("# CUDA: FixFreezeCuda::setup\n"); )
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixFreezeCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
post_force(vflag);
cuda->cu_f->download();
}
MYDBG( printf("# CUDA: FixFreezeCuda::setup done\n"); )
}
/* ---------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
void FixFreezeCuda::post_force(int vflag)
{
MYDBG( printf("# CUDA: FixFreezeCuda::postforce start\n"); )
force_flag = 0;
cu_foriginal->memset_device(0);
Cuda_FixFreezeCuda_PostForce(&cuda->shared_data, groupbit, (F_FLOAT*) cu_foriginal->dev_data());
cu_foriginal->download();
}
/* ---------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
return components of total force on fix group before force was changed
------------------------------------------------------------------------- */
double FixFreezeCuda::compute_vector(int n)
{
// only sum across procs one time
if (force_flag == 0) {
MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world);
force_flag = 1;
}
return foriginal_all[n+1];
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(freeze/cuda,FixFreezeCuda)
#else
#ifndef LMP_FIX_FREEZE_CUDA_H
#define LMP_FIX_FREEZE_CUDA_H
#include "fix.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixFreezeCuda : public Fix {
public:
FixFreezeCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void post_force(int);
double compute_vector(int);
private:
class Cuda *cuda;
double foriginal[3],foriginal_all[3];
cCudaData<double , F_FLOAT , x>* cu_foriginal;
int force_flag;
};
}
#endif
#endif

View File

@ -0,0 +1,181 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "fix_gravity_cuda.h"
#include "fix_gravity_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "domain.h"
#include "respa.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
enum{CHUTE,SPHERICAL,GRADIENT,VECTOR};
/* ---------------------------------------------------------------------- */
FixGravityCuda::FixGravityCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg < 5) error->all("Illegal fix gravity command");
time_depend = 1;
magnitude = atof(arg[3]);
if (strcmp(arg[4],"chute") == 0) {
if (narg != 6) error->all("Illegal fix gravity command");
style = CHUTE;
phi = 0.0;
theta = 180.0 - atof(arg[5]);
} else if (strcmp(arg[4],"spherical") == 0) {
if (narg != 7) error->all("Illegal fix gravity command");
style = SPHERICAL;
phi = atof(arg[5]);
theta = atof(arg[6]);
} else if (strcmp(arg[4],"gradient") == 0) {
if (narg != 9) error->all("Illegal fix gravity command");
style = GRADIENT;
phi = atof(arg[5]);
theta = atof(arg[6]);
phigrad = atof(arg[7]);
thetagrad = atof(arg[8]);
} else if (strcmp(arg[4],"vector") == 0) {
if (narg != 8) error->all("Illegal fix gravity command");
style = VECTOR;
xdir = atof(arg[5]);
ydir = atof(arg[6]);
zdir = atof(arg[7]);
} else error->all("Illegal fix gravity command");
double PI = 4.0*atan(1.0);
degree2rad = PI/180.0;
if (style == CHUTE || style == SPHERICAL || style == GRADIENT) {
if (domain->dimension == 3) {
xgrav = sin(degree2rad * theta) * cos(degree2rad * phi);
ygrav = sin(degree2rad * theta) * sin(degree2rad * phi);
zgrav = cos(degree2rad * theta);
} else {
xgrav = sin(degree2rad * theta);
ygrav = cos(degree2rad * theta);
zgrav = 0.0;
}
} else if (style == VECTOR) {
if (domain->dimension == 3) {
double length = sqrt(xdir*xdir + ydir*ydir + zdir*zdir);
xgrav = xdir/length;
ygrav = ydir/length;
zgrav = zdir/length;
} else {
double length = sqrt(xdir*xdir + ydir*ydir);
xgrav = xdir/length;
ygrav = ydir/length;
zgrav = 0.0;
}
}
time_origin = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
int FixGravityCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixGravityCuda::init()
{
dt = update->dt;
xacc = magnitude*xgrav;
yacc = magnitude*ygrav;
zacc = magnitude*zgrav;
}
/* ---------------------------------------------------------------------- */
void FixGravityCuda::setup(int vflag)
{
MYDBG( printf("# CUDA: FixGravityCuda::setup\n"); )
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixGravityCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
post_force(vflag);
cuda->cu_f->download();
}
else {
}
MYDBG( printf("# CUDA: FixGravityCuda::setup done\n"); )
}
/* ---------------------------------------------------------------------- */
void FixGravityCuda::post_force(int vflag)
{
// update direction of gravity vector if gradient style
if (style == GRADIENT) {
if (domain->dimension == 3) {
double phi_current = degree2rad *
(phi + (update->ntimestep - time_origin)*dt*phigrad*360.0);
double theta_current = degree2rad *
(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
xgrav = sin(theta_current) * cos(phi_current);
ygrav = sin(theta_current) * sin(phi_current);
zgrav = cos(theta_current);
} else {
double theta_current = degree2rad *
(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
xgrav = sin(theta_current);
ygrav = cos(theta_current);
}
xacc = magnitude*xgrav;
yacc = magnitude*ygrav;
zacc = magnitude*zgrav;
}
MYDBG( printf("# CUDA: FixGravityCuda::postforce start\n"); )
Cuda_FixGravityCuda_PostForce(&cuda->shared_data, groupbit, xacc,yacc,zacc);
}

View File

@ -0,0 +1,60 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(gravity/cuda,FixGravityCuda)
#else
#ifndef LMP_FIX_GRAVITY_CUDA_H
#define LMP_FIX_GRAVITY_CUDA_H
#include "fix.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixGravityCuda : public Fix {
public:
FixGravityCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void post_force(int);
private:
class Cuda *cuda;
int style;
double magnitude,dt;
double phi,theta,phigrad,thetagrad;
double xdir,ydir,zdir;
double xgrav,ygrav,zgrav,xacc,yacc,zacc;
double degree2rad;
int time_origin;
};
}
#endif
#endif

File diff suppressed because it is too large Load Diff

126
src/USER-CUDA/fix_nh_cuda.h Normal file
View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_FIX_NH_CUDA_H
#define LMP_FIX_NH_CUDA_H
#include "fix.h"
#include "cuda_precision.h"
namespace LAMMPS_NS {
class FixNHCuda : public Fix {
public:
FixNHCuda(class LAMMPS *, int, char **);
virtual ~FixNHCuda();
int setmask();
virtual void init();
void setup(int);
virtual void initial_integrate(int);
virtual void final_integrate();
void initial_integrate_respa(int, int, int);
void final_integrate_respa(int, int);
double compute_scalar();
double compute_vector(int);
void write_restart(FILE *);
void restart(char *);
int modify_param(int, char **);
void reset_dt();
protected:
class Cuda *cuda;
int dimension,which;
double dtv,dtf,dthalf,dt4,dt8,dto;
double boltz,nktv2p,tdof;
double vol0,t0;
double t_start,t_stop;
double t_current,t_target;
double t_freq;
int tstat_flag; // 1 if control T
int pstat_flag; // 1 if control P
int pstyle,pcouple,allremap;
int p_flag[6]; // 1 if control P on this dim, 0 if not
double p_start[6],p_stop[6];
double p_freq[6],p_target[6];
double omega[6],omega_dot[6];
double omega_mass[6];
double p_current[6],dilation[6];
double drag,tdrag_factor; // drag factor on particle thermostat
double pdrag_factor; // drag factor on barostat
double factor[6]; // velocity scaling due to barostat
int kspace_flag; // 1 if KSpace invoked, 0 if not
int nrigid; // number of rigid fixes
int *rfix; // indices of rigid fixes
int nlevels_respa;
double *step_respa;
char *id_temp,*id_press;
class Compute *temperature,*pressure;
int tflag,pflag;
double *eta,*eta_dot; // chain thermostat for particles
double *eta_dotdot;
double *eta_mass;
int mtchain; // length of chain
double *etap; // chain thermostat for barostat
double *etap_dot;
double *etap_dotdot;
double *etap_mass;
int mpchain; // length of chain
int mtk_flag; // 0 if using Hoover barostat
double mtk_term1,mtk_term2;
int mtchain_default_flag;
int pdim; // number of barostatted dims
double mvv_current[3]; // diagonal of KE tensor
double mtk_factor; // MTK factor
double p_freq_max; // maximum barostat frequency
double p_hydro; // hydrostatic target pressure
int nc_tchain,nc_pchain;
double factor_eta;
double sigma[6]; // scaled target stress
double fdev[6]; // deviatoric force on barostat
int deviatoric_flag; // 0 if target stress tensor is hydrostatic
double h0_inv[6]; // h_inv of reference (zero strain) box
int nreset_h0; // interval for resetting h0
void couple();
void couple_ke();
void remap();
void nhc_temp_integrate();
void nhc_press_integrate();
virtual void nve_x(); // may be overwritten by child classes
virtual void nve_v();
virtual void nh_v_press();
virtual void nh_v_temp();
void compute_sigma();
void compute_deviatoric();
double compute_strain_energy();
void compute_press_target();
void nh_omega_dot();
X_FLOAT triggerneighsq;
};
}
#endif

View File

@ -0,0 +1,71 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstring>
#include "fix_npt_cuda.h"
#include "modify.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixNPTCuda::FixNPTCuda(LAMMPS *lmp, int narg, char **arg) :
FixNHCuda(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (!tstat_flag)
error->all("Temperature control must be used with fix npt");
if (!pstat_flag)
error->all("Pressure control must be used with fix npt");
// create a new compute temp style
// id = fix-ID + temp
// compute group = all since pressure is always global (group all)
// and thus its KE/temperature contribution should use group all
int n = strlen(id) + 6;
id_temp = new char[n];
strcpy(id_temp,id);
strcat(id_temp,"_temp");
char **newarg = new char*[3];
newarg[0] = id_temp;
newarg[1] = (char *) "all";
newarg[2] = (char *) "temp/cuda";
modify->add_compute(3,newarg);
delete [] newarg;
tflag = 1;
// create a new compute pressure style
// id = fix-ID + press, compute group = all
// pass id_temp as 4th arg to pressure constructor
n = strlen(id) + 7;
id_press = new char[n];
strcpy(id_press,id);
strcat(id_press,"_press");
newarg = new char*[4];
newarg[0] = id_press;
newarg[1] = (char *) "all";
newarg[2] = (char *) "pressure/cuda";
newarg[3] = id_temp;
modify->add_compute(4,newarg);
delete [] newarg;
pflag = 1;
}

View File

@ -0,0 +1,36 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(npt/cuda,FixNPTCuda)
#else
#ifndef LMP_FIX_NPTCuda_H
#define LMP_FIX_NPTCuda_H
#include "fix_nh_cuda.h"
namespace LAMMPS_NS {
class FixNPTCuda : public FixNHCuda {
public:
FixNPTCuda(class LAMMPS *, int, char **);
~FixNPTCuda() {}
};
}
#endif
#endif

View File

@ -0,0 +1,155 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdio>
#include <cstring>
#include "fix_nve_cuda.h"
#include "fix_nve_cuda_cu.h"
#include "atom.h"
#include "force.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixNVECuda::FixNVECuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (strcmp(style,"nve/sphere") != 0 && narg < 3)
error->all("Illegal fix nve command");
time_integrate = 1;
}
/* ---------------------------------------------------------------------- */
int FixNVECuda::setmask()
{
int mask = 0;
mask |= INITIAL_INTEGRATE_CUDA;
mask |= FINAL_INTEGRATE_CUDA;
// mask |= INITIAL_INTEGRATE_RESPA_CUDA;
// mask |= FINAL_INTEGRATE_RESPA_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixNVECuda::init()
{
dtv = update->dt;
dtf = 0.5 * update->dt * force->ftm2v;
if (strcmp(update->integrate_style,"respa") == 0)
step_respa = ((Respa *) update->integrate)->step;
triggerneighsq= cuda->shared_data.atom.triggerneighsq;
cuda->neighbor_decide_by_integrator=1;
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
}
/* ----------------------------------------------------------------------
allow for both per-type and per-atom mass
------------------------------------------------------------------------- */
void FixNVECuda::initial_integrate(int vflag)
{
if(triggerneighsq!=cuda->shared_data.atom.triggerneighsq)
{
triggerneighsq= cuda->shared_data.atom.triggerneighsq;
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
}
int nlocal = atom->nlocal;
if(igroup == atom->firstgroup) nlocal = atom->nfirst;
Cuda_FixNVECuda_InitialIntegrate(& cuda->shared_data, groupbit,nlocal);
}
/* ---------------------------------------------------------------------- */
void FixNVECuda::final_integrate()
{
int nlocal = atom->nlocal;
if(igroup == atom->firstgroup) nlocal = atom->nfirst;
Cuda_FixNVECuda_FinalIntegrate(& cuda->shared_data, groupbit,nlocal);
}
/* ---------------------------------------------------------------------- */
void FixNVECuda::initial_integrate_respa(int vflag, int ilevel, int flag)
{
//this point should not be reached yet since RESPA is not supported
if (flag) return; // only used by NPT,NPH
dtv = step_respa[ilevel];
dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
// innermost level - NVE update of v and x
// all other levels - NVE update of v
if(ilevel == 0) initial_integrate(vflag);
else final_integrate();
}
/* ---------------------------------------------------------------------- */
void FixNVECuda::final_integrate_respa(int ilevel, int iloop)
{
//this point should not be reached yet since RESPA is not supported
dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
final_integrate();
}
/* ---------------------------------------------------------------------- */
void FixNVECuda::reset_dt()
{
dtv = update->dt;
dtf = 0.5 * update->dt * force->ftm2v;
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
}

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(nve/cuda,FixNVECuda)
#else
#ifndef LMP_FIX_NVE_CUDA_H
#define LMP_FIX_NVE_CUDA_H
#include "fix.h"
#include "cuda_precision.h"
namespace LAMMPS_NS {
class FixNVECuda : public Fix
{
public:
FixNVECuda(class LAMMPS *, int, char **);
int setmask();
virtual void init();
virtual void initial_integrate(int);
virtual void final_integrate();
void initial_integrate_respa(int, int, int);
void final_integrate_respa(int, int);
void reset_dt();
X_FLOAT triggerneighsq;
protected:
class Cuda *cuda;
double dtv, dtf;
double *step_respa;
int mass_require;
};
}
#endif
#endif

View File

@ -0,0 +1,48 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstring>
#include "fix_nvt_cuda.h"
#include "group.h"
#include "modify.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixNVTCuda::FixNVTCuda(LAMMPS *lmp, int narg, char **arg) :
FixNHCuda(lmp, narg, arg)
{
if (!tstat_flag)
error->all("Temperature control must be used with fix nvt");
if (pstat_flag)
error->all("Pressure control can not be used with fix nvt");
// create a new compute temp style
// id = fix-ID + temp
int n = strlen(id) + 6;
id_temp = new char[n];
strcpy(id_temp,id);
strcat(id_temp,"_temp");
char **newarg = new char*[3];
newarg[0] = id_temp;
newarg[1] = group->names[igroup];
newarg[2] = (char *) "temp/cuda";
modify->add_compute(3,newarg);
delete [] newarg;
tflag = 1;
}

View File

@ -0,0 +1,36 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(nvt/cuda,FixNVTCuda)
#else
#ifndef LMP_FIX_NVTCuda_H
#define LMP_FIX_NVTCuda_H
#include "fix_nh_cuda.h"
namespace LAMMPS_NS {
class FixNVTCuda : public FixNHCuda {
public:
FixNVTCuda(class LAMMPS *, int, char **);
~FixNVTCuda() {}
};
}
#endif
#endif

View File

@ -0,0 +1,181 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include "fix_set_force_cuda.h"
#include "fix_set_force_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "cuda.h"
#include "memory.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixSetForceCuda::FixSetForceCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 6) error->all("Illegal fix setforce/cuda command");
vector_flag = 1;
size_vector = 3;
global_freq = 1;
extvector = 1;
flagx = flagy = flagz = 1;
if (strcmp(arg[3],"NULL") == 0) flagx = 0;
else xvalue = atof(arg[3]);
if (strcmp(arg[4],"NULL") == 0) flagy = 0;
else yvalue = atof(arg[4]);
if (strcmp(arg[5],"NULL") == 0) flagz = 0;
else zvalue = atof(arg[5]);
force_flag = 0;
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
cu_foriginal=NULL;
}
/* ---------------------------------------------------------------------- */
int FixSetForceCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
mask |= THERMO_ENERGY_CUDA;
mask |= POST_FORCE_RESPA;
mask |= MIN_POST_FORCE_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::init()
{
if(not cu_foriginal)
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,3);
if (strcmp(update->integrate_style,"respa") == 0)
nlevels_respa = ((Respa *) update->integrate)->nlevels;
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::setup(int vflag)
{
MYDBG( printf("# CUDA: FixSetForceCuda::setup\n"); )
if (strcmp(update->integrate_style,"verlet") == 0)
{
Cuda_FixSetForceCuda_Init(&cuda->shared_data);
cuda->cu_f->upload();
post_force(vflag);
cuda->cu_f->download();
}
else {
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
cuda->cu_f->download();
post_force_respa(vflag,nlevels_respa-1,0);
cuda->cu_f->upload();
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
}
MYDBG( printf("# CUDA: FixSetForceCuda::setup done\n"); )
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::min_setup(int vflag)
{
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::post_force(int vflag)
{
MYDBG( printf("# CUDA: FixSetForceCuda::postforce start\n"); )
force_flag = 0;
cu_foriginal->memset_device(0);
Cuda_FixSetForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_FLOAT*) cu_foriginal->dev_data(),flagx,flagy,flagz);
cu_foriginal->download();
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
{
if (ilevel == nlevels_respa-1) post_force(vflag);
else {
cuda->cu_f->download();
cuda->cu_mask->download();
double **f = atom->f;
int *mask = atom->mask;
int nlocal = atom->nlocal;
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
force_flag = 0;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit) {
foriginal[0] += f[i][0];
foriginal[1] += f[i][1];
foriginal[2] += f[i][2];
if (flagx) f[i][0] = 0.0;
if (flagy) f[i][1] = 0.0;
if (flagz) f[i][2] = 0.0;
}
cuda->cu_f->upload();
}
}
/* ---------------------------------------------------------------------- */
void FixSetForceCuda::min_post_force(int vflag)
{
post_force(vflag);
}
/* ----------------------------------------------------------------------
return components of total force on fix group before force was changed
------------------------------------------------------------------------- */
double FixSetForceCuda::compute_vector(int n)
{
// only sum across procs one time
if (force_flag == 0) {
MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world);
force_flag = 1;
}
return foriginal_all[n+1];
}

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(setforce/cuda,FixSetForceCuda)
#else
#ifndef LMP_FIX_SET_FORCE_CUDA_H
#define LMP_FIX_SET_FORCE_CUDA_H
#include "fix.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixSetForceCuda : public Fix {
public:
FixSetForceCuda(class LAMMPS *, int, char **);
int setmask();
void init();
void setup(int);
void min_setup(int);
void post_force(int);
void post_force_respa(int, int, int);
void min_post_force(int);
double compute_vector(int);
private:
class Cuda *cuda;
int flagx,flagy,flagz;
double xvalue,yvalue,zvalue;
double foriginal[3],foriginal_all[3];
cCudaData<double , F_FLOAT , x>* cu_foriginal;
int force_flag;
int nlevels_respa;
};
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,133 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(shake/cuda,FixShakeCuda)
#else
#ifndef LMP_FIX_SHAKE_CUDA_H
#define LMP_FIX_SHAKE_CUDA_H
#include "fix.h"
#include "cuda_data.h"
#include "cuda_precision.h"
namespace LAMMPS_NS {
class FixShakeCuda : public Fix {
public:
FixShakeCuda(class LAMMPS *, int, char **);
~FixShakeCuda();
int setmask();
void init();
void setup(int);
void pre_neighbor();
void post_force(int);
//void post_force_respa(int, int, int);
double memory_usage();
void grow_arrays(int);
void copy_arrays(int, int);
void set_arrays(int);
int pack_exchange(int, double *);
int unpack_exchange(int, double *);
int pack_comm(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
int dof(int);
void reset_dt();
double time_postforce;
private:
class Cuda *cuda;
int me,nprocs;
double PI;
double tolerance; // SHAKE tolerance
int max_iter; // max # of SHAKE iterations
int output_every; // SHAKE stat output every so often
int next_output; // timestep for next output
// settings from input command
int *bond_flag,*angle_flag; // bond/angle types to constrain
int *type_flag; // constrain bonds to these types
double *mass_list; // constrain bonds to these masses
int nmass; // # of masses in mass_list
bool neighbor_step; // was neighboring done in this step -> need to run the Cuda_FixShake_Init
double *bond_distance,*angle_distance; // constraint distances
cCudaData<double , X_FLOAT , xx >* cu_bond_distance;
cCudaData<double , X_FLOAT , xx >* cu_angle_distance;
int ifix_respa; // rRESPA fix needed by SHAKE
int nlevels_respa; // copies of needed rRESPA variables
int *loop_respa;
double *step_respa;
double **x,**v,**f; // local ptrs to atom class quantities
double *mass,*rmass;
int *type;
int nlocal;
// atom-based arrays
int *shake_flag; // 0 if atom not in SHAKE cluster
// 1 = size 3 angle cluster
// 2,3,4 = size of bond-only cluster
int **shake_atom; // global IDs of atoms in cluster
// central atom is 1st
// lowest global ID is 1st for size 2
int **shake_type; // bondtype of each bond in cluster
// for angle cluster, 3rd value
// is angletype
double **xshake; // unconstrained atom coords
cCudaData<int , int , xx >* cu_shake_flag;
cCudaData<int , int , yx >* cu_shake_atom;
cCudaData<int , int , yx >* cu_shake_type;
cCudaData<double , X_FLOAT , xy >* cu_xshake;
cCudaData<int , int , xx >* cu_list;
cCudaData<double , ENERGY_FLOAT , xx >* cu_virial;
int* countoccur;
int vflag; // virial flag
double dtv,dtfsq; // timesteps for trial move
double dtf_inner,dtf_innerhalf; // timesteps for rRESPA trial move
int *list; // list of clusters to SHAKE
int nlist,maxlist; // size and max-size of list
// stat quantities
int *b_count,*b_count_all; // counts for each bond type
double *b_ave,*b_max,*b_min; // ave/max/min dist for each bond type
double *b_ave_all,*b_max_all,*b_min_all; // MPI summing arrays
int *a_count,*a_count_all; // ditto for angle types
double *a_ave,*a_max,*a_min;
double *a_ave_all,*a_max_all,*a_min_all;
void find_clusters();
void swap_clusters(int i,int j);
int masscheck(double);
void unconstrained_update();
void shake2(int);
void shake3(int);
void shake4(int);
void shake3angle(int);
void stats();
int bondfind(int, int, int);
int anglefind(int, int, int);
};
}
#endif
#endif

View File

@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include <cmath>
#include "fix_temp_berendsen_cuda.h"
#include "fix_temp_berendsen_cuda_cu.h"
#include "atom.h"
#include "force.h"
#include "group.h"
#include "update.h"
#include "comm.h"
#include "modify.h"
#include "compute.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
enum{NOBIAS,BIAS};
/* ---------------------------------------------------------------------- */
FixTempBerendsenCuda::FixTempBerendsenCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg != 6) error->all("Illegal fix temp/berendsen/cuda command");
// Berendsen thermostat should be applied every step
nevery = 1;
t_start = atof(arg[3]);
t_stop = atof(arg[4]);
t_period = atof(arg[5]);
// error checks
if (t_period <= 0.0) error->all("Fix temp/berendsen/cuda period must be > 0.0");
// create a new compute temp style
// id = fix-ID + temp, compute group = fix group
int n = strlen(id) + 6;
id_temp = new char[n];
strcpy(id_temp,id);
strcat(id_temp,"_temp");
char **newarg = new char*[3];
newarg[0] = id_temp;
newarg[1] = group->names[igroup];
newarg[2] = (char *) "temp/cuda";
modify->add_compute(3,newarg);
delete [] newarg;
tflag = 1;
}
/* ---------------------------------------------------------------------- */
FixTempBerendsenCuda::~FixTempBerendsenCuda()
{
// delete temperature if fix created it
if (tflag) modify->delete_compute(id_temp);
delete [] id_temp;
}
/* ---------------------------------------------------------------------- */
int FixTempBerendsenCuda::setmask()
{
int mask = 0;
mask |= END_OF_STEP_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixTempBerendsenCuda::init()
{
int icompute = modify->find_compute(id_temp);
if (icompute < 0)
error->all("Temperature ID for fix temp/berendsen/cuda does not exist");
temperature = modify->compute[icompute];
if(not temperature->cudable)
error->warning("Fix temp/berendsen/cuda uses non cudable temperature compute");
if (temperature->tempbias) which = BIAS;
else which = NOBIAS;
//temperature->init(); //not in original berendsen possible error?
}
/* ---------------------------------------------------------------------- */
void FixTempBerendsenCuda::end_of_step()
{
double t_current;
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
t_current = temperature->compute_scalar();
if (t_current == 0.0)
error->all("Computed temperature for fix temp/berendsen/cuda cannot be 0.0");
double delta = update->ntimestep - update->beginstep;
delta /= update->endstep - update->beginstep;
t_target = t_start + delta * (t_stop-t_start);
// rescale velocities by lamda
double lamda = sqrt(1.0 + update->dt/t_period*(t_target/t_current - 1.0));
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;
if (which == NOBIAS) {
Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda);
} else {
if(not temperature->cudable)
{
cuda->cu_x->download();cuda->cu_v->download();
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
temperature->remove_bias(i,v[i]);
v[i][0] *= lamda;
v[i][1] *= lamda;
v[i][2] *= lamda;
temperature->restore_bias(i,v[i]);
}
}
cuda->cu_v->upload();
}
else
{
temperature->remove_bias_all();
Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda);
temperature->restore_bias_all();
}
}
}
/* ---------------------------------------------------------------------- */
int FixTempBerendsenCuda::modify_param(int narg, char **arg)
{
if (strcmp(arg[0],"temp") == 0) {
if (narg < 2) error->all("Illegal fix_modify command");
if (tflag) {
modify->delete_compute(id_temp);
tflag = 0;
}
delete [] id_temp;
int n = strlen(arg[1]) + 1;
id_temp = new char[n];
strcpy(id_temp,arg[1]);
int icompute = modify->find_compute(id_temp);
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
temperature = modify->compute[icompute];
if (temperature->tempflag == 0)
error->all("Fix_modify temperature ID does not compute temperature");
if (temperature->igroup != igroup && comm->me == 0)
error->warning("Group for fix_modify temp != fix group");
return 2;
}
return 0;
}
/* ---------------------------------------------------------------------- */
void FixTempBerendsenCuda::reset_target(double t_new)
{
t_start = t_stop = t_new;
}

View File

@ -0,0 +1,58 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(temp/berendsen/cuda,FixTempBerendsenCuda)
#else
#ifndef LMP_FIX_TEMP_BERENDSEN_CUDA_H
#define LMP_FIX_TEMP_BERENDSEN_CUDA_H
#include "fix.h"
namespace LAMMPS_NS {
class FixTempBerendsenCuda : public Fix {
public:
FixTempBerendsenCuda(class LAMMPS *, int, char **);
~FixTempBerendsenCuda();
int setmask();
void init();
void end_of_step();
int modify_param(int, char **);
void reset_target(double);
private:
class Cuda *cuda;
int which;
double t_start,t_stop,t_target,t_period;
char *id_temp;
class Compute *temperature;
int tflag;
};
}
#endif
#endif

View File

@ -0,0 +1,222 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include <cmath>
#include "fix_temp_rescale_cuda.h"
#include "fix_temp_rescale_cuda_cu.h"
#include "atom.h"
#include "force.h"
#include "group.h"
#include "update.h"
#include "domain.h"
#include "region.h"
#include "comm.h"
#include "modify.h"
#include "compute.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
enum{NOBIAS,BIAS};
/* ---------------------------------------------------------------------- */
FixTempRescaleCuda::FixTempRescaleCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg < 8) error->all("Illegal fix temp/rescale/cuda command");
nevery = atoi(arg[3]);
if (nevery <= 0) error->all("Illegal fix temp/rescale/cuda command");
scalar_flag = 1;
global_freq = nevery;
extscalar = 1;
t_start = atof(arg[4]);
t_stop = atof(arg[5]);
t_window = atof(arg[6]);
fraction = atof(arg[7]);
// create a new compute temp
// id = fix-ID + temp, compute group = fix group
int n = strlen(id) + 6;
id_temp = new char[n];
strcpy(id_temp,id);
strcat(id_temp,"_temp");
char **newarg = new char*[6];
newarg[0] = id_temp;
newarg[1] = group->names[igroup];
newarg[2] = (char *) "temp/cuda";
modify->add_compute(3,newarg);
delete [] newarg;
tflag = 1;
energy = 0.0;
}
/* ---------------------------------------------------------------------- */
FixTempRescaleCuda::~FixTempRescaleCuda()
{
// delete temperature if fix created it
if (tflag) modify->delete_compute(id_temp);
delete [] id_temp;
}
/* ---------------------------------------------------------------------- */
int FixTempRescaleCuda::setmask()
{
int mask = 0;
mask |= END_OF_STEP_CUDA;
mask |= THERMO_ENERGY_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleCuda::init()
{
int icompute = modify->find_compute(id_temp);
if (icompute < 0)
error->all("Temperature ID for fix temp/rescale/cuda does not exist");
temperature = modify->compute[icompute];
if(not temperature->cudable)
error->warning("Fix temp/rescale/cuda uses non cudable temperature compute");
if (temperature->tempbias) which = BIAS;
else which = NOBIAS;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleCuda::end_of_step()
{
double t_current;
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
t_current = temperature->compute_scalar();
if (t_current == 0.0)
error->all("Computed temperature for fix temp/rescale/cuda cannot be 0.0");
double delta = update->ntimestep - update->beginstep;
delta /= update->endstep - update->beginstep;
double t_target = t_start + delta * (t_stop-t_start);
// rescale velocity of appropriate atoms if outside window
if (fabs(t_current-t_target) > t_window) {
t_target = t_current - fraction*(t_current-t_target);
double factor = sqrt(t_target/t_current);
double efactor = 0.5 * force->boltz * temperature->dof;
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;
if (which == NOBIAS) {
energy += (t_current-t_target) * efactor;
Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor);
} else if (which == BIAS) {
energy += (t_current-t_target) * efactor;
if(not temperature->cudable)
{
cuda->cu_x->download();cuda->cu_v->download();
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
temperature->remove_bias(i,v[i]);
v[i][0] *= factor;
v[i][1] *= factor;
v[i][2] *= factor;
temperature->restore_bias(i,v[i]);
}
}
cuda->cu_v->upload();
}
else
{
temperature->remove_bias_all();
Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor);
temperature->restore_bias_all();
}
}
}
}
/* ---------------------------------------------------------------------- */
int FixTempRescaleCuda::modify_param(int narg, char **arg)
{
if (strcmp(arg[0],"temp") == 0) {
if (narg < 2) error->all("Illegal fix_modify command");
if (tflag) {
modify->delete_compute(id_temp);
tflag = 0;
}
delete [] id_temp;
int n = strlen(arg[1]) + 1;
id_temp = new char[n];
strcpy(id_temp,arg[1]);
int icompute = modify->find_compute(id_temp);
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
temperature = modify->compute[icompute];
if (temperature->tempflag == 0)
error->all("Fix_modify temperature ID does not compute temperature");
if (temperature->igroup != igroup && comm->me == 0)
error->warning("Group for fix_modify temp != fix group");
if(not temperature->cudable)
error->warning("Fix temp/rescale/cuda uses non cudable temperature compute");
return 2;
}
return 0;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleCuda::reset_target(double t_new)
{
t_start = t_stop = t_new;
}
/* ---------------------------------------------------------------------- */
double FixTempRescaleCuda::compute_scalar()
{
return energy;
}

View File

@ -0,0 +1,61 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(temp/rescale/cuda,FixTempRescaleCuda)
#else
#ifndef FIX_TEMP_RESCALE_CUDA_H
#define FIX_TEMP_RESCALE_CUDA_H
#include "fix.h"
namespace LAMMPS_NS {
class FixTempRescaleCuda : public Fix {
public:
FixTempRescaleCuda(class LAMMPS *, int, char **);
~FixTempRescaleCuda();
int setmask();
void init();
void end_of_step();
int modify_param(int, char **);
void reset_target(double);
double compute_scalar();
private:
class Cuda *cuda;
int which;
double t_start,t_stop,t_window;
double fraction,energy,efactor;
char *id_temp;
class Compute *temperature;
int tflag;
};
}
#endif
#endif

View File

@ -0,0 +1,237 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cstring>
#include <cstdlib>
#include <cmath>
#include "fix_temp_rescale_limit_cuda.h"
#include "fix_temp_rescale_limit_cuda_cu.h"
#include "atom.h"
#include "force.h"
#include "group.h"
#include "update.h"
#include "domain.h"
#include "region.h"
#include "comm.h"
#include "modify.h"
#include "compute.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
#define MAX(A,B) ((A) > (B)) ? (A) : (B)
enum{NOBIAS,BIAS};
/* ---------------------------------------------------------------------- */
FixTempRescaleLimitCuda::FixTempRescaleLimitCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
if (narg < 9) error->all("Illegal fix temp/rescale/limit/cuda command");
nevery = atoi(arg[3]);
if (nevery <= 0) error->all("Illegal fix temp/rescale/limit/cuda command");
scalar_flag = 1;
global_freq = nevery;
extscalar = 1;
t_start = atof(arg[4]);
t_stop = atof(arg[5]);
t_window = atof(arg[6]);
fraction = atof(arg[7]);
limit = atof(arg[8]);
if (limit <= 1.0) error->all("Illegal fix temp/rescale/limit/cuda command (limit must be > 1.0)");
// create a new compute temp
// id = fix-ID + temp, compute group = fix group
int n = strlen(id) + 6;
id_temp = new char[n];
strcpy(id_temp,id);
strcat(id_temp,"_temp");
char **newarg = new char*[6];
newarg[0] = id_temp;
newarg[1] = group->names[igroup];
newarg[2] = (char *) "temp/cuda";
modify->add_compute(3,newarg);
delete [] newarg;
tflag = 1;
energy = 0.0;
}
/* ---------------------------------------------------------------------- */
FixTempRescaleLimitCuda::~FixTempRescaleLimitCuda()
{
// delete temperature if fix created it
if (tflag) modify->delete_compute(id_temp);
delete [] id_temp;
}
/* ---------------------------------------------------------------------- */
int FixTempRescaleLimitCuda::setmask()
{
int mask = 0;
mask |= END_OF_STEP_CUDA;
mask |= THERMO_ENERGY_CUDA;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleLimitCuda::init()
{
int icompute = modify->find_compute(id_temp);
if (icompute < 0)
error->all("Temperature ID for fix temp/rescale/limit/cuda does not exist");
temperature = modify->compute[icompute];
if(not temperature->cudable)
error->warning("Fix temp/rescale/limit/cuda uses non cudable temperature compute");
if (temperature->tempbias) which = BIAS;
else which = NOBIAS;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleLimitCuda::end_of_step()
{
double t_current;
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
t_current = temperature->compute_scalar();
if (t_current == 0.0)
error->all("Computed temperature for fix temp/rescale/limit/cuda cannot be 0.0");
double delta = update->ntimestep - update->beginstep;
delta /= update->endstep - update->beginstep;
double t_target = t_start + delta * (t_stop-t_start);
// rescale velocity of appropriate atoms if outside window
if (fabs(t_current-t_target) > t_window) {
t_target = t_current - fraction*(t_current-t_target);
double factor = sqrt(t_target/t_current);
double efactor = 0.5 * force->boltz * temperature->dof;
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double massone;
if(atom->rmass) massone = atom->rmass[0];
else massone = atom->mass[0];
double current_limit=sqrt(limit*force->boltz*t_target*temperature->dof/massone/force->mvv2e);
if (which == NOBIAS) {
energy += (t_current-t_target) * efactor;
Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit);
} else if (which == BIAS) {
energy += (t_current-t_target) * efactor;
if(not temperature->cudable)
{
cuda->cu_x->download();cuda->cu_v->download();
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
temperature->remove_bias(i,v[i]);
double vx = v[i][0] * factor;
double vy = v[i][1] * factor;
double vz = v[i][2] * factor;
v[i][0]=vx>0?MIN(vx,current_limit):MAX(vx,-current_limit);
v[i][1]=vy>0?MIN(vy,current_limit):MAX(vy,-current_limit);
v[i][2]=vz>0?MIN(vz,current_limit):MAX(vz,-current_limit);
temperature->restore_bias(i,v[i]);
}
}
cuda->cu_v->upload();
}
else
{
temperature->remove_bias_all();
Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit);
temperature->restore_bias_all();
}
}
}
}
/* ---------------------------------------------------------------------- */
int FixTempRescaleLimitCuda::modify_param(int narg, char **arg)
{
if (strcmp(arg[0],"temp") == 0) {
if (narg < 2) error->all("Illegal fix_modify command");
if (tflag) {
modify->delete_compute(id_temp);
tflag = 0;
}
delete [] id_temp;
int n = strlen(arg[1]) + 1;
id_temp = new char[n];
strcpy(id_temp,arg[1]);
int icompute = modify->find_compute(id_temp);
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
temperature = modify->compute[icompute];
if (temperature->tempflag == 0)
error->all("Fix_modify temperature ID does not compute temperature");
if (temperature->igroup != igroup && comm->me == 0)
error->warning("Group for fix_modify temp != fix group");
if(not temperature->cudable)
error->warning("Fix temp/rescale/limit/cuda uses non cudable temperature compute");
return 2;
}
return 0;
}
/* ---------------------------------------------------------------------- */
void FixTempRescaleLimitCuda::reset_target(double t_new)
{
t_start = t_stop = t_new;
}
/* ---------------------------------------------------------------------- */
double FixTempRescaleLimitCuda::compute_scalar()
{
return energy;
}

View File

@ -0,0 +1,61 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(temp/rescale/limit/cuda,FixTempRescaleLimitCuda)
#else
#ifndef FIX_TEMP_RESCALE_LIMIT_CUDA_H
#define FIX_TEMP_RESCALE_LIMIT_CUDA_H
#include "fix.h"
namespace LAMMPS_NS {
class FixTempRescaleLimitCuda : public Fix {
public:
FixTempRescaleLimitCuda(class LAMMPS *, int, char **);
~FixTempRescaleLimitCuda();
int setmask();
void init();
void end_of_step();
int modify_param(int, char **);
void reset_target(double);
double compute_scalar();
private:
class Cuda *cuda;
int which;
double t_start,t_stop,t_window;
double fraction,energy,efactor;
double limit;
char *id_temp;
class Compute *temperature;
int tflag;
};
}
#endif
#endif

View File

@ -0,0 +1,103 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdlib>
#include <cstring>
#include "fix_viscous_cuda.h"
#include "fix_viscous_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "cuda_modify_flags.h"
#include "cuda.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
FixViscousCuda::FixViscousCuda(LAMMPS *lmp, int narg, char **arg) :
FixViscous(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
cu_gamma=NULL;
}
/* ---------------------------------------------------------------------- */
FixViscousCuda::~FixViscousCuda()
{
delete cu_gamma;
}
/* ---------------------------------------------------------------------- */
int FixViscousCuda::setmask()
{
int mask = 0;
mask |= POST_FORCE_CUDA;
// mask |= POST_FORCE_RESPA;
// mask |= MIN_POST_FORCE;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixViscousCuda::setup(int vflag)
{
if(not cu_gamma)
cu_gamma = new cCudaData<double, F_FLOAT, x> (gamma,atom->ntypes+1);
Cuda_FixViscousCuda_Init(&cuda->shared_data);
cu_gamma->upload();
// if (strcmp(update->integrate_style,"verlet/cuda") == 0)
post_force(vflag);
/* else {
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
post_force_respa(vflag,nlevels_respa-1,0);
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
}*/
}
/* ---------------------------------------------------------------------- */
void FixViscousCuda::min_setup(int vflag)
{
Cuda_FixViscousCuda_Init(&cuda->shared_data);
post_force(vflag);
}
/* ---------------------------------------------------------------------- */
void FixViscousCuda::post_force(int vflag)
{
// apply drag force to atoms in group
// direction is opposed to velocity vector
// magnitude depends on atom type
Cuda_FixViscousCuda_PostForce(&cuda->shared_data, groupbit,cu_gamma->dev_data());
}

View File

@ -0,0 +1,55 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(viscous/cuda,FixViscousCuda)
#else
#ifndef LMP_FIX_VISCOUS_CUDA_H
#define LMP_FIX_VISCOUS_CUDA_H
#include "fix_viscous.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class FixViscousCuda : public FixViscous {
public:
FixViscousCuda(class LAMMPS *, int, char **);
~FixViscousCuda();
int setmask();
void setup(int);
void min_setup(int);
void post_force(int);
cCudaData<double, F_FLOAT, x>* cu_gamma;
private:
class Cuda *cuda;
};
}
#endif
#endif

View File

@ -63,6 +63,8 @@ using namespace LAMMPS_NS;
ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
n_initial_integrate_cuda = 0;
n_post_integrate_cuda = 0;

View File

@ -21,7 +21,6 @@
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef CUDA
#include "neighbor_cuda.h"
#include "neigh_list.h"
#include "atom.h"
@ -313,5 +312,4 @@ return;
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... end\n");)
*/
}
#endif

View File

@ -36,6 +36,8 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
NeighborCuda::NeighborCuda(LAMMPS *lmp) : Neighbor(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
}
/* ---------------------------------------------------------------------- */

View File

@ -0,0 +1,186 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_born_coul_long_cuda.h"
#include "pair_born_coul_long_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairBornCoulLongCuda::PairBornCoulLongCuda(LAMMPS *lmp) : PairBornCoulLong(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairBornCoulLongCuda::allocate()
{
if(! allocated) PairBornCoulLong::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.coeff1 = rhoinv;
cuda->shared_data.pair.coeff2 = sigma;
cuda->shared_data.pair.coeff3 = a;
cuda->shared_data.pair.coeff4 = c;
cuda->shared_data.pair.coeff5 = d;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairBornCoulLongCuda::compute(int eflag, int vflag)
{
MYDBG( printf("PairBornCoulLongCuda compute start\n"); fflush(stdout);)
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
#ifdef CUDA_USE_BINNING
Cuda_PairBornCoulLongCuda(& cuda->shared_data, eflag, vflag);
#else
Cuda_PairBornCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
#endif
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
MYDBG( printf("PairBornCoulLongCuda compute end\n"); fflush(stdout);)
}
/* ---------------------------------------------------------------------- */
void PairBornCoulLongCuda::settings(int narg, char **arg)
{
PairBornCoulLong::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
}
/* ---------------------------------------------------------------------- */
void PairBornCoulLongCuda::coeff(int narg, char **arg)
{
PairBornCoulLong::coeff(narg, arg);
allocate();
}
void PairBornCoulLongCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style born/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
if (force->kspace == NULL)
error->all("Pair style is incompatible with KSpace style");
g_ewald = force->kspace->g_ewald;
cuda->shared_data.pair.g_ewald=g_ewald;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairBornCoulLongCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list\n");)
PairBornCoulLong::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list end\n");)
}
void PairBornCoulLongCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairBornCoulLong::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(born/coul/long/cuda,PairBornCoulLongCuda)
#else
#ifndef LMP_PAIR_BORN_COUL_LONG_CUDA_H
#define LMP_PAIR_BORN_COUL_LONG_CUDA_H
#include "pair_born_coul_long.h"
namespace LAMMPS_NS {
class PairBornCoulLongCuda : public PairBornCoulLong
{
public:
PairBornCoulLongCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,173 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_buck_coul_cut_cuda.h"
#include "pair_buck_coul_cut_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairBuckCoulCutCuda::PairBuckCoulCutCuda(LAMMPS *lmp) : PairBuckCoulCut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairBuckCoulCutCuda::allocate()
{
if(! allocated) PairBuckCoulCut::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut_coul = cut_coul;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.coeff1 = rhoinv;
cuda->shared_data.pair.coeff2 = buck1;
cuda->shared_data.pair.coeff3 = buck2;
cuda->shared_data.pair.coeff4 = a;
cuda->shared_data.pair.coeff5 = c;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulCutCuda::compute(int eflag, int vflag)
{
MYDBG( printf("PairBuckCoulCutCuda compute start\n"); fflush(stdout);)
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairBuckCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
MYDBG( printf("PairBuckCoulCutCuda compute end\n"); fflush(stdout);)
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulCutCuda::settings(int narg, char **arg)
{
PairBuckCoulCut::settings(narg, arg);
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulCutCuda::coeff(int narg, char **arg)
{
PairBuckCoulCut::coeff(narg, arg);
allocate();
}
void PairBuckCoulCutCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style buck/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
cuda->shared_data.pair.cut_coulsq_global=cut_coul_global * cut_coul_global;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairBuckCoulCutCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list\n");)
PairBuckCoulCut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list end\n");)
}
void PairBuckCoulCutCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairBuckCoulCut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(buck/coul/cut/cuda,PairBuckCoulCutCuda)
#else
#ifndef LMP_PAIR_BUCK_COUL_CUT_CUDA_H
#define LMP_PAIR_BUCK_COUL_CUT_CUDA_H
#include "pair_buck_coul_cut.h"
namespace LAMMPS_NS {
class PairBuckCoulCutCuda : public PairBuckCoulCut
{
public:
PairBuckCoulCutCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,184 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_buck_coul_long_cuda.h"
#include "pair_buck_coul_long_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairBuckCoulLongCuda::PairBuckCoulLongCuda(LAMMPS *lmp) : PairBuckCoulLong(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairBuckCoulLongCuda::allocate()
{
if(! allocated) PairBuckCoulLong::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.coeff1 = rhoinv;
cuda->shared_data.pair.coeff2 = buck1;
cuda->shared_data.pair.coeff3 = buck2;
cuda->shared_data.pair.coeff4 = a;
cuda->shared_data.pair.coeff5 = c;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulLongCuda::compute(int eflag, int vflag)
{
MYDBG( printf("PairBuckCoulLongCuda compute start\n"); fflush(stdout);)
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairBuckCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
MYDBG( printf("PairBuckCoulLongCuda compute end\n"); fflush(stdout);)
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulLongCuda::settings(int narg, char **arg)
{
PairBuckCoulLong::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
}
/* ---------------------------------------------------------------------- */
void PairBuckCoulLongCuda::coeff(int narg, char **arg)
{
PairBuckCoulLong::coeff(narg, arg);
allocate();
}
void PairBuckCoulLongCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style buck/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
if (force->kspace == NULL)
error->all("Pair style is incompatible with KSpace style");
g_ewald = force->kspace->g_ewald;
cuda->shared_data.pair.g_ewald=g_ewald;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairBuckCoulLongCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list\n");)
PairBuckCoulLong::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list end\n");)
}
void PairBuckCoulLongCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairBuckCoulLong::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(buck/coul/long/cuda,PairBuckCoulLongCuda)
#else
#ifndef LMP_PAIR_BUCK_COUL_LONG_CUDA_H
#define LMP_PAIR_BUCK_COUL_LONG_CUDA_H
#include "pair_buck_coul_long.h"
namespace LAMMPS_NS {
class PairBuckCoulLongCuda : public PairBuckCoulLong
{
public:
PairBuckCoulLongCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_buck_cuda.h"
#include "pair_buck_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairBuckCuda::PairBuckCuda(LAMMPS *lmp) : PairBuck(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairBuckCuda::allocate()
{
if(! allocated) PairBuck::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut;
cuda->shared_data.pair.coeff1 = rhoinv;
cuda->shared_data.pair.coeff2 = buck1;
cuda->shared_data.pair.coeff3 = buck2;
cuda->shared_data.pair.coeff4 = a;
cuda->shared_data.pair.coeff5 = c;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
}
}
/* ---------------------------------------------------------------------- */
void PairBuckCuda::compute(int eflag, int vflag)
{
MYDBG( printf("PairBuckCuda compute start\n"); fflush(stdout);)
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairBuckCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
MYDBG( printf("PairBuckCuda compute end\n"); fflush(stdout);)
}
/* ---------------------------------------------------------------------- */
void PairBuckCuda::settings(int narg, char **arg)
{
PairBuck::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
}
/* ---------------------------------------------------------------------- */
void PairBuckCuda::coeff(int narg, char **arg)
{
PairBuck::coeff(narg, arg);
allocate();
}
void PairBuckCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style buck/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairBuckCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairBuckCuda::init_list\n");)
PairBuck::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairBuckCuda::init_list end\n");)
}
void PairBuckCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairBuck::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(buck/cuda,PairBuckCuda)
#else
#ifndef LMP_PAIR_BUCK_CUDA_H
#define LMP_PAIR_BUCK_CUDA_H
#include "pair_buck.h"
namespace LAMMPS_NS {
class PairBuckCuda : public PairBuck
{
public:
PairBuckCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,204 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_cg_cmm_coul_cut_cuda.h"
#include "pair_cg_cmm_coul_cut_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairCGCMMCoulCutCuda::PairCGCMMCoulCutCuda(LAMMPS *lmp) : PairCGCMMCoulCut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cg_type_double = NULL;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairCGCMMCoulCutCuda::allocate()
{
if(! allocated) PairCGCMMCoulCut::allocate();
int n = atom->ntypes;
if(! allocated2)
{
allocated2 = true;
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.coeff5 = cg_type_double;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
for (int i = 1; i <= n; i++) {
for (int j = i; j <= n; j++) {
cg_type_double[i][j] = cg_type[i][j];
cg_type_double[j][i] = cg_type[i][j];
}
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulCutCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairCGCMMCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulCutCuda::settings(int narg, char **arg)
{
PairCGCMMCoulCut::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulCutCuda::coeff(int narg, char **arg)
{
PairCGCMMCoulCut::coeff(narg, arg);
allocate();
}
void PairCGCMMCoulCutCuda::init_style()
{
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
cut_respa=NULL;
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_style end\n"); )
}
void PairCGCMMCoulCutCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_list\n");)
PairCGCMMCoulCut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_list end\n");)
}
void PairCGCMMCoulCutCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairCGCMMCoulCut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,58 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(cg/cmm/coul/cut/cuda,PairCGCMMCoulCutCuda)
#else
#ifndef PAIR_CG_CMM_COUL_CUT_CUDA_H
#define PAIR_CG_CMM_COUL_CUT_CUDA_H
#include "pair_cg_cmm_coul_cut.h"
namespace LAMMPS_NS {
class PairCGCMMCoulCutCuda : public PairCGCMMCoulCut
{
public:
PairCGCMMCoulCutCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
double** cg_type_double;
};
}
#endif
#endif

View File

@ -0,0 +1,204 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_cg_cmm_coul_debye_cuda.h"
#include "pair_cg_cmm_coul_debye_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairCGCMMCoulDebyeCuda::PairCGCMMCoulDebyeCuda(LAMMPS *lmp) : PairCGCMMCoulCut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cg_type_double = NULL;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairCGCMMCoulDebyeCuda::allocate()
{
if(! allocated) PairCGCMMCoulCut::allocate();
int n = atom->ntypes;
if(! allocated2)
{
allocated2 = true;
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.coeff5 = cg_type_double;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
for (int i = 1; i <= n; i++) {
for (int j = i; j <= n; j++) {
cg_type_double[i][j] = cg_type[i][j];
cg_type_double[j][i] = cg_type[i][j];
}
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulDebyeCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairCGCMMCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulDebyeCuda::settings(int narg, char **arg)
{
PairCGCMMCoulCut::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulDebyeCuda::coeff(int narg, char **arg)
{
PairCGCMMCoulCut::coeff(narg, arg);
allocate();
}
void PairCGCMMCoulDebyeCuda::init_style()
{
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
cut_respa=NULL;
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_style end\n"); )
}
void PairCGCMMCoulDebyeCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_list\n");)
PairCGCMMCoulCut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_list end\n");)
}
void PairCGCMMCoulDebyeCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairCGCMMCoulCut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,58 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(cg/cmm/coul/debye/cuda,PairCGCMMCoulDebyeCuda)
#else
#ifndef PAIR_CG_CMM_COUL_DEBYE_CUDA_H
#define PAIR_CG_CMM_COUL_DEBYE_CUDA_H
#include "pair_cg_cmm_coul_cut.h"
namespace LAMMPS_NS {
class PairCGCMMCoulDebyeCuda : public PairCGCMMCoulCut
{
public:
PairCGCMMCoulDebyeCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
double** cg_type_double;
};
}
#endif
#endif

View File

@ -0,0 +1,206 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_cg_cmm_coul_long_cuda.h"
#include "pair_cg_cmm_coul_long_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairCGCMMCoulLongCuda::PairCGCMMCoulLongCuda(LAMMPS *lmp) : PairCGCMMCoulLong(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cg_type_double = NULL;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairCGCMMCoulLongCuda::allocate()
{
if(! allocated) PairCGCMMCoulLong::allocate();
int n = atom->ntypes;
if(! allocated2)
{
allocated2 = true;
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.coeff5 = cg_type_double;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
for (int i = 1; i <= n; i++) {
for (int j = i; j <= n; j++) {
cg_type_double[i][j] = cg_type[i][j];
cg_type_double[j][i] = cg_type[i][j];
}
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulLongCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairCGCMMCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulLongCuda::settings(int narg, char **arg)
{
PairCGCMMCoulLong::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCoulLongCuda::coeff(int narg, char **arg)
{
PairCGCMMCoulLong::coeff(narg, arg);
allocate();
}
void PairCGCMMCoulLongCuda::init_style()
{
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
g_ewald = force->kspace->g_ewald;
cuda->shared_data.pair.g_ewald=g_ewald;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
cut_respa=NULL;
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_style end\n"); )
}
void PairCGCMMCoulLongCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_list\n");)
PairCGCMMCoulLong::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_list end\n");)
}
void PairCGCMMCoulLongCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairCGCMMCoulLong::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,58 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(cg/cmm/coul/long/cuda,PairCGCMMCoulLongCuda)
#else
#ifndef PAIR_CG_CMM_COUL_LONG_CUDA_H
#define PAIR_CG_CMM_COUL_LONG_CUDA_H
#include "pair_cg_cmm_coul_long.h"
namespace LAMMPS_NS {
class PairCGCMMCoulLongCuda : public PairCGCMMCoulLong
{
public:
PairCGCMMCoulLongCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
double** cg_type_double;
};
}
#endif
#endif

View File

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_cg_cmm_cuda.h"
#include "pair_cg_cmm_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairCGCMMCuda::PairCGCMMCuda(LAMMPS *lmp) : PairCGCMM(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cg_type_double = NULL;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairCGCMMCuda::allocate()
{
if(! allocated) PairCGCMM::allocate();
int n = atom->ntypes;
if(! allocated2)
{
allocated2 = true;
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
cuda->shared_data.pair.cut = cut;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.coeff5 = cg_type_double;
/*cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_cg_type_double_gm = new cCudaData<double, F_FLOAT, x> ((double*)cg_type_double, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));*/
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
}
for (int i = 1; i <= n; i++) {
for (int j = i; j <= n; j++) {
cg_type_double[i][j] = cg_type[i][j];
cg_type_double[j][i] = cg_type[i][j];
}
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairCGCMMCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCuda::settings(int narg, char **arg)
{
PairCGCMM::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
}
/* ---------------------------------------------------------------------- */
void PairCGCMMCuda::coeff(int narg, char **arg)
{
PairCGCMM::coeff(narg, arg);
allocate();
}
void PairCGCMMCuda::init_style()
{
MYDBG(printf("# CUDA PairCGCMMCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
cut_respa=NULL;
MYDBG(printf("# CUDA PairCGCMMCuda::init_style end\n"); )
}
void PairCGCMMCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairCGCMMCuda::init_list\n");)
PairCGCMM::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairCGCMMCuda::init_list end\n");)
}
void PairCGCMMCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairCGCMM::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,64 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(cg/cmm/cuda,PairCGCMMCuda)
#else
#ifndef PAIR_CG_CMM_CUDA_H
#define PAIR_CG_CMM_CUDA_H
#include "pair_cg_cmm.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class PairCGCMMCuda : public PairCGCMM
{
public:
PairCGCMMCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
double** cg_type_double;
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
cCudaData<double , F_FLOAT , x >* cu_cg_type_double_gm;
};
}
#endif
#endif

View File

@ -0,0 +1,326 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
------------------------------------------------------------------------- */
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_eam_alloy_cuda.h"
#include "atom.h"
#include "comm.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMAlloyCuda::PairEAMAlloyCuda(LAMMPS *lmp) : PairEAMCuda(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyCuda::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all("Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all("Incorrect args for pair coefficients");
// read EAM setfl file
if (setfl) {
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
delete [] setfl->elements;
delete [] setfl->mass;
memory->destroy(setfl->frho);
memory->destroy(setfl->rhor);
memory->destroy(setfl->z2r);
delete setfl;
}
setfl = new Setfl();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < setfl->nelements; j++)
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
if (j < setfl->nelements) map[i-2] = j;
else error->all("No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
count++;
}
}
}
if (count == 0) error->all("Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyCuda::read_file(char *filename)
{
Setfl *file = setfl;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = fopen(filename,"r");
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all("Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
char *first = strtok(line," \t\n\r\f");
while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMAlloyCuda::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from setfl file
nrho = setfl->nrho;
nr = setfl->nr;
drho = setfl->drho;
dr = setfl->dr;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of setfl elements + 1 for zero array
nfrho = setfl->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = # of setfl elements
nrhor = setfl->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element's rhor to global rhor
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for setfl files, I,J mapping only depends on I
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of setfl elements
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < setfl->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,44 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/alloy/cuda,PairEAMAlloyCuda)
#else
#ifndef LMP_PAIR_EAM_CUDA_ALLOY_H
#define LMP_PAIR_EAM_CUDA_ALLOY_H
#include "pair_eam_cuda.h"
namespace LAMMPS_NS {
// use virtual public since this class is parent in multiple inheritance
class PairEAMAlloyCuda : virtual public PairEAMCuda {
public:
PairEAMAlloyCuda(class LAMMPS *);
virtual ~PairEAMAlloyCuda() {}
void coeff(int, char **);
protected:
class Cuda *cuda;
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -0,0 +1,239 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_eam_cuda.h"
#include "pair_eam_cuda_cu.h"
#include "pair_virial_compute_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.override_block_per_atom = 0;
cuda->setSystemParams();
cu_rho=NULL;
cu_fp=NULL;
cu_frho_spline = NULL;
cu_z2r_spline = NULL;
cu_rhor_spline = NULL;
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairEAMCuda::allocate()
{
if(! allocated) PairEAM::allocate();
cuda->shared_data.pair.cutsq = cutsq;
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
}
/* ---------------------------------------------------------------------- */
void PairEAMCuda::compute(int eflag, int vflag)
{
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->shared_data.pair.collect_forces_later = 0;
if (atom->nmax > nmax) {
memory->destroy(rho);
memory->destroy(fp);
nmax = atom->nmax;
memory->create(rho,nmax,"pair:rho");
memory->create(fp,nmax,"pair:fp");
delete cu_rho;
delete cu_fp;
cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
cu_fp = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
Cuda_PairEAMCuda_Init(&cuda->shared_data,rdr,rdrho,nfrho,nrhor,nr,nrho,nz2r,
cu_frho_spline->dev_data(),cu_rhor_spline->dev_data(),cu_z2r_spline->dev_data(),
cu_rho->dev_data(),cu_fp->dev_data(),type2frho,type2z2r,type2rhor);
}
if(eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
comm->forward_comm_pair(this);
Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
if(eflag) cuda->cu_eng_vdwl->download();
if(vflag) cuda->cu_virial->download();
}
/* ---------------------------------------------------------------------- */
void PairEAMCuda::settings(int narg, char **arg)
{
PairEAM::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
}
/* ---------------------------------------------------------------------- */
void PairEAMCuda::coeff(int narg, char **arg)
{
PairEAM::coeff(narg, arg);
allocate();
}
void PairEAMCuda::init_style()
{
MYDBG(printf("# CUDA PairEAMCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
file2array();
array2spline();
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
delete cu_rhor_spline;
delete cu_z2r_spline;
delete cu_frho_spline;
cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline,nrhor,nr+1,EAM_COEFF_LENGTH);
cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline,nz2r,nr+1,EAM_COEFF_LENGTH);
cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline,nfrho,nrho+1,EAM_COEFF_LENGTH);
cu_rhor_spline->upload();
cu_z2r_spline->upload();
cu_frho_spline->upload();
MYDBG(printf("# CUDA PairEAMCuda::init_style end\n"); )
}
void PairEAMCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
PairEAM::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
}
void PairEAMCuda::array2spline()
{
rdr = 1.0/dr;
rdrho = 1.0/drho;
memory->destroy(frho_spline);
memory->destroy(rhor_spline);
memory->destroy(z2r_spline);
memory->create(frho_spline,nfrho,nrho+1,7,"pair:frho");
memory->create(rhor_spline,nrhor,nr+1,7,"pair:rhor");
memory->create(z2r_spline,nz2r,nr+1,7,"pair:z2r");
for (int i = 0; i < nfrho; i++){
interpolate(nrho,drho,frho[i],frho_spline[i]);
for(int j=0;j<nrho+1;j++)
frho_spline[i][j][7]=frho_spline[i][j][3];
}
for (int i = 0; i < nrhor; i++){
interpolate(nr,dr,rhor[i],rhor_spline[i]);
for(int j=0;j<nr+1;j++)
rhor_spline[i][j][7]=rhor_spline[i][j][3];
}
for (int i = 0; i < nz2r; i++){
interpolate(nr,dr,z2r[i],z2r_spline[i]);
for(int j=0;j<nr+1;j++)
z2r_spline[i][j][7]=z2r_spline[i][j][3];
}
}
/* ---------------------------------------------------------------------- */
int PairEAMCuda::pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc)
{
Cuda_PairEAMCuda_PackComm(&cuda->shared_data,n,*iswap,buf);
if(sizeof(F_FLOAT)<sizeof(double)) return 1;
else return 1;
}
/* ---------------------------------------------------------------------- */
void PairEAMCuda::unpack_comm(int n, int first, double *buf)
{
Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data,n,first,buf,cu_fp->dev_data());
}

View File

@ -0,0 +1,78 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/cuda,PairEAMCuda)
#else
#ifndef PAIR_EAM_CUDA_H
#define PAIR_EAM_CUDA_H
#include "cuda_data.h"
#include "pair_eam.h"
namespace LAMMPS_NS {
class PairEAMCuda : public PairEAM
{
public:
PairEAMCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void array2spline();
int pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc);
void unpack_comm(int n, int first, double *buf);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
cCudaData<double, F_FLOAT, x>* cu_rho;
cCudaData<double, F_FLOAT, x>* cu_fp;
cCudaData<double, F_FLOAT, xyz>* cu_rhor_spline;
cCudaData<double, F_FLOAT, xyz>* cu_z2r_spline;
cCudaData<double, F_FLOAT, xyz>* cu_frho_spline;
};
}
#endif
#endif

View File

@ -0,0 +1,335 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Tim Lau (MIT)
------------------------------------------------------------------------- */
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_eam_fs_cuda.h"
#include "atom.h"
#include "comm.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMFSCuda::PairEAMFSCuda(LAMMPS *lmp) : PairEAMCuda(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read EAM Finnis-Sinclair file
------------------------------------------------------------------------- */
void PairEAMFSCuda::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all("Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all("Incorrect args for pair coefficients");
// read EAM Finnis-Sinclair file
if (fs) {
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
delete [] fs->elements;
delete [] fs->mass;
memory->destroy(fs->frho);
memory->destroy(fs->rhor);
memory->destroy(fs->z2r);
delete fs;
}
fs = new Fs();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < fs->nelements; j++)
if (strcmp(arg[i],fs->elements[j]) == 0) break;
if (j < fs->nelements) map[i-2] = j;
else error->all("No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(i,fs->mass[map[i]]);
count++;
}
}
}
if (count == 0) error->all("Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMFSCuda::read_file(char *filename)
{
Fs *file = fs;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = fopen(filename,"r");
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all("Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
char *first = strtok(line," \t\n\r\f");
while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,
"pair:frho");
memory->create(file->rhor,file->nelements,file->nelements,
file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,
file->nr+1,"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
for (j = 0; j < file->nelements; j++) {
if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMFSCuda::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from fs file
nrho = fs->nrho;
nr = fs->nr;
drho = fs->drho;
dr = fs->dr;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of fs elements + 1 for zero array
nfrho = fs->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < fs->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = square of # of fs elements
nrhor = fs->nelements * fs->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element pair rhor to global rhor
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j < fs->nelements; j++) {
for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
n++;
}
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for fs files, there is a full NxN set of rhor arrays
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i] * fs->nelements + map[j];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of fs elements
nz2r = fs->nelements * (fs->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,44 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/fs/cuda,PairEAMFSCuda)
#else
#ifndef LMP_PAIR_EAM_FS_CUDA_H
#define LMP_PAIR_EAM_FS_CUDA_H
#include "pair_eam_cuda.h"
namespace LAMMPS_NS {
// use virtual public since this class is parent in multiple inheritance
class PairEAMFSCuda : virtual public PairEAMCuda {
public:
PairEAMFSCuda(class LAMMPS *);
virtual ~PairEAMFSCuda() {}
void coeff(int, char **);
protected:
class Cuda *cuda;
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -0,0 +1,247 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_gran_hooke_cuda.h"
#include "pair_gran_hooke_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "modify.h"
#include "fix_pour.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairGranHookeCuda::PairGranHookeCuda(LAMMPS *lmp) : PairGranHooke(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairGranHookeCuda::allocate()
{
if(! allocated) PairGranHooke::allocate();
if(! allocated2)
{
allocated2 = true;
int n = atom->ntypes;
cuda->shared_data.pair.cutsq = cutsq;
memory->create(cuda->shared_data.pair.coeff1,n+1,n+1,
"pair:cuda_coeff1");
memory->create(cuda->shared_data.pair.coeff2,
n+1,n+1,"pair:cuda_coeff2");
cuda->shared_data.pair.coeff1[0][0]=kn;
cuda->shared_data.pair.coeff1[0][1]=kt;
cuda->shared_data.pair.coeff1[1][0]=gamman;
cuda->shared_data.pair.coeff1[1][1]=gammat;
cuda->shared_data.pair.coeff2[0][0]=xmu;
cuda->shared_data.pair.coeff2[0][1]=dampflag;
}
}
/* ---------------------------------------------------------------------- */
void PairGranHookeCuda::compute(int eflag, int vflag)
{
cuda->shared_data.pair.use_block_per_atom = 0;
//cuda->cu_debugdata->memset_device(0);
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairGranHookeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(vflag) cuda->cu_virial->download();
}
//cuda->cu_debugdata->download();
//printf("%lf %lf %lf %lf %lf %lf\n",1.0e-6*cuda->debugdata[0],1.0e-6*cuda->debugdata[1],1.0e-6*cuda->debugdata[2],1.0e-6*cuda->debugdata[3],1.0e-6*cuda->debugdata[4],1.0e-6*cuda->debugdata[5]);
}
/* ---------------------------------------------------------------------- */
void PairGranHookeCuda::settings(int narg, char **arg)
{
PairGranHooke::settings(narg, arg);
}
/* ---------------------------------------------------------------------- */
void PairGranHookeCuda::coeff(int narg, char **arg)
{
PairGranHooke::coeff(narg, arg);
allocate();
}
void PairGranHookeCuda::init_style()
{
int i;
MYDBG(printf("# CUDA PairGranHookeCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->gran = 1;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
if (!atom->radius_flag || !atom->omega_flag || !atom->torque_flag)
error->all("Pair granular requires atom attributes radius, omega, torque");
if (comm->ghost_velocity == 0)
error->all("Pair granular requires ghost atoms store velocity");
// need a half neigh list and optionally a granular history neigh list
dt = update->dt;
// check for Fix freeze and set freeze_group_bit
for (i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
else freeze_group_bit = 0;
cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
// check for Fix pour and set pour_type and pour_maxdiam
int pour_type = 0;
double pour_maxrad = 0.0;
for (i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"pour") == 0) break;
if (i < modify->nfix) {
pour_type = ((FixPour *) modify->fix[i])->ntype;
pour_maxrad = ((FixPour *) modify->fix[i])->radius_hi;
}
// set maxrad_dynamic and maxrad_frozen for each type
// include future Fix pour particles as dynamic
for (i = 1; i <= atom->ntypes; i++)
onerad_dynamic[i] = onerad_frozen[i] = 0.0;
if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;
double *radius = atom->radius;
int *mask = atom->mask;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++){
if (mask[i] & freeze_group_bit)
onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
else
onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
}
MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
MPI_DOUBLE,MPI_MAX,world);
MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
}
void PairGranHookeCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairGranHookeCuda::init_list\n");)
PairGranHooke::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairGranHookeCuda::init_list end\n");)
}
void PairGranHookeCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairGranHooke::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(gran/hooke/cuda,PairGranHookeCuda)
#else
#ifndef PAIR_GRAN_HOOKE_CUDA_H
#define PAIR_GRAN_HOOKE_CUDA_H
#include "pair_gran_hooke.h"
namespace LAMMPS_NS {
class PairGranHookeCuda : public PairGranHooke
{
public:
PairGranHookeCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,184 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj96_cut_cuda.h"
#include "pair_lj96_cut_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJ96CutCuda::PairLJ96CutCuda(LAMMPS *lmp) : PairLJ96Cut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJ96CutCuda::allocate()
{
if(! allocated) PairLJ96Cut::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJ96CutCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJ96CutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJ96CutCuda::settings(int narg, char **arg)
{
PairLJ96Cut::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
}
/* ---------------------------------------------------------------------- */
void PairLJ96CutCuda::coeff(int narg, char **arg)
{
PairLJ96Cut::coeff(narg, arg);
allocate();
}
void PairLJ96CutCuda::init_style()
{
MYDBG(printf("# CUDA PairLJ96CutCuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
}
else
{
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
}
cut_respa = NULL;
MYDBG(printf("# CUDA PairLJ96CutCuda::init_style end\n"); )
}
void PairLJ96CutCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJ96CutCuda::init_list\n");)
PairLJ96Cut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJ96CutCuda::init_list end\n");)
}
void PairLJ96CutCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJ96Cut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj96/cut/cuda,PairLJ96CutCuda)
#else
#ifndef PAIR_LJ96_CUT_CUDA_H
#define PAIR_LJ96_CUT_CUDA_H
#include "pair_lj96_cut.h"
namespace LAMMPS_NS {
class PairLJ96CutCuda : public PairLJ96Cut
{
public:
PairLJ96CutCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,193 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_charmm_coul_charmm_cuda.h"
#include "pair_lj_charmm_coul_charmm_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmCuda::PairLJCharmmCoulCharmmCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmm(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.use_block_per_atom = 0;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmCuda::allocate()
{
if(! allocated) PairLJCharmmCoulCharmm::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
}
Cuda_PairLJCharmmCoulCharmmCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmCuda::settings(int narg, char **arg)
{
PairLJCharmmCoulCharmm::settings(narg, arg);
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmCuda::coeff(int narg, char **arg)
{
PairLJCharmmCoulCharmm::coeff(narg, arg);
allocate();
}
void PairLJCharmmCoulCharmmCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
if(atom->molecular)
{
cuda->shared_data.pair.collect_forces_later = 1;
}
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
error->all("Pair inner cutoff >= Pair outer cutoff");
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
(cut_ljsq-cut_lj_innersq);
denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
(cut_coulsq-cut_coul_innersq);
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
}
void PairLJCharmmCoulCharmmCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list\n");)
PairLJCharmmCoulCharmm::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list end\n");)
}
void PairLJCharmmCoulCharmmCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJCharmmCoulCharmm::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/charmm/cuda,PairLJCharmmCoulCharmmCuda)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H
#include "pair_lj_charmm_coul_charmm.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulCharmmCuda : public PairLJCharmmCoulCharmm
{
public:
PairLJCharmmCoulCharmmCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
};
}
#endif
#endif

View File

@ -0,0 +1,188 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_charmm_coul_charmm_implicit_cuda.h"
#include "pair_lj_charmm_coul_charmm_implicit_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmImplicitCuda::PairLJCharmmCoulCharmmImplicitCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmmImplicit(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.collect_forces_later = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmImplicitCuda::allocate()
{
if(! allocated) PairLJCharmmCoulCharmmImplicit::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmImplicitCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
}
Cuda_PairLJCharmmCoulCharmmImplicitCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmImplicitCuda::settings(int narg, char **arg)
{
PairLJCharmmCoulCharmmImplicit::settings(narg, arg);
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmImplicitCuda::coeff(int narg, char **arg)
{
PairLJCharmmCoulCharmmImplicit::coeff(narg, arg);
allocate();
}
void PairLJCharmmCoulCharmmImplicitCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
error->all("Pair inner cutoff >= Pair outer cutoff");
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
(cut_ljsq-cut_lj_innersq);
denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
(cut_coulsq-cut_coul_innersq);
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
}
void PairLJCharmmCoulCharmmImplicitCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list\n");)
PairLJCharmmCoulCharmmImplicit::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list end\n");)
}
void PairLJCharmmCoulCharmmImplicitCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJCharmmCoulCharmmImplicit::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,62 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/charmm/implicit/cuda,PairLJCharmmCoulCharmmImplicitCuda)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H
#include "pair_lj_charmm_coul_charmm_implicit.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulCharmmImplicitCuda : public PairLJCharmmCoulCharmmImplicit
{
public:
PairLJCharmmCoulCharmmImplicitCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
};
}
#endif
#endif

View File

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_charmm_coul_long_cuda.h"
#include "pair_lj_charmm_coul_long_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulLongCuda::PairLJCharmmCoulLongCuda(LAMMPS *lmp) : PairLJCharmmCoulLong(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->shared_data.pair.collect_forces_later = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJCharmmCoulLongCuda::allocate()
{
if(! allocated) PairLJCharmmCoulLong::allocate();
if(! allocated2)
{
allocated2 = true;
//cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
}
Cuda_PairLJCharmmCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongCuda::settings(int narg, char **arg)
{
PairLJCharmmCoulLong::settings(narg, arg);
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongCuda::coeff(int narg, char **arg)
{
PairLJCharmmCoulLong::coeff(narg, arg);
allocate();
}
void PairLJCharmmCoulLongCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
if (cut_lj_inner >= cut_lj)
error->all("Pair inner cutoff >= Pair outer cutoff");
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
(cut_ljsq-cut_lj_innersq);
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
if (force->kspace == NULL)
error->all("Pair style is incompatible with KSpace style");
g_ewald = force->kspace->g_ewald;
cuda->shared_data.pair.g_ewald=g_ewald;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairLJCharmmCoulLongCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list\n");)
PairLJCharmmCoulLong::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list end\n");)
}
void PairLJCharmmCoulLongCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJCharmmCoulLong::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,62 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/long/cuda,PairLJCharmmCoulLongCuda)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H
#define LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H
#include "pair_lj_charmm_coul_long.h"
#include "cuda_data.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulLongCuda : public PairLJCharmmCoulLong
{
public:
PairLJCharmmCoulLongCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
};
}
#endif
#endif

View File

@ -0,0 +1,167 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_class2_coul_cut_cuda.h"
#include "pair_lj_class2_coul_cut_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJClass2CoulCutCuda::PairLJClass2CoulCutCuda(LAMMPS *lmp) : PairLJClass2CoulCut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJClass2CoulCutCuda::allocate()
{
if(! allocated) PairLJClass2CoulCut::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulCutCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJClass2CoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulCutCuda::settings(int narg, char **arg)
{
PairLJClass2CoulCut::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulCutCuda::coeff(int narg, char **arg)
{
PairLJClass2CoulCut::coeff(narg, arg);
allocate();
}
void PairLJClass2CoulCutCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/cut/coul/cut/cuda requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
}
void PairLJClass2CoulCutCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list\n");)
PairLJClass2CoulCut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list end\n");)
}
void PairLJClass2CoulCutCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJClass2CoulCut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/class2/coul/cut/cuda,PairLJClass2CoulCutCuda)
#else
#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H
#define LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H
#include "pair_lj_class2_coul_cut.h"
namespace LAMMPS_NS {
class PairLJClass2CoulCutCuda : public PairLJClass2CoulCut
{
public:
PairLJClass2CoulCutCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,180 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_class2_coul_long_cuda.h"
#include "pair_lj_class2_coul_long_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairLJClass2CoulLongCuda::PairLJClass2CoulLongCuda(LAMMPS *lmp) : PairLJClass2CoulLong(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJClass2CoulLongCuda::allocate()
{
if(! allocated) PairLJClass2CoulLong::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulLongCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJClass2CoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulLongCuda::settings(int narg, char **arg)
{
PairLJClass2CoulLong::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulLongCuda::coeff(int narg, char **arg)
{
PairLJClass2CoulLong::coeff(narg, arg);
allocate();
}
void PairLJClass2CoulLongCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/cut/coul/long requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cut_coulsq = cut_coul * cut_coul;
cuda->shared_data.pair.cut_coul_global=cut_coul;
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
// set rRESPA cutoffs
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
if (force->kspace == NULL)
error->all("Pair style is incompatible with KSpace style");
g_ewald = force->kspace->g_ewald;
cuda->shared_data.pair.g_ewald=g_ewald;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
}
void PairLJClass2CoulLongCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list\n");)
PairLJClass2CoulLong::init_list(id, ptr);
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list end\n");)
}
void PairLJClass2CoulLongCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJClass2CoulLong::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/class2/coul/long/cuda,PairLJClass2CoulLongCuda)
#else
#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H
#define LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H
#include "pair_lj_class2_coul_long.h"
namespace LAMMPS_NS {
class PairLJClass2CoulLongCuda : public PairLJClass2CoulLong
{
public:
PairLJClass2CoulLongCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,172 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_class2_cuda.h"
#include "pair_lj_class2_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJClass2Cuda::PairLJClass2Cuda(LAMMPS *lmp) : PairLJClass2(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJClass2Cuda::allocate()
{
if(! allocated) PairLJClass2::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2Cuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJClass2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2Cuda::settings(int narg, char **arg)
{
PairLJClass2::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
}
/* ---------------------------------------------------------------------- */
void PairLJClass2Cuda::coeff(int narg, char **arg)
{
PairLJClass2::coeff(narg, arg);
allocate();
}
void PairLJClass2Cuda::init_style()
{
MYDBG(printf("# CUDA PairLJClass2Cuda::init_style start\n"); )
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
//neighbor->style=0; //0=NSQ neighboring
MYDBG(printf("# CUDA PairLJClass2Cuda::init_style end\n"); )
}
void PairLJClass2Cuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJClass2Cuda::init_list\n");)
PairLJClass2::init_list(id, ptr);
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
MYDBG(printf("# CUDA PairLJClass2Cuda::init_list end\n");)
}
void PairLJClass2Cuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJClass2::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/class2/cuda,PairLJClass2Cuda)
#else
#ifndef PAIR_LJ_CLASS2_CUDA_H
#define PAIR_LJ_CLASS2_CUDA_H
#include "pair_lj_class2.h"
namespace LAMMPS_NS {
class PairLJClass2Cuda : public PairLJClass2
{
public:
PairLJClass2Cuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,167 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_cut_coul_cut_cuda.h"
#include "pair_lj_cut_coul_cut_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJCutCoulCutCuda::PairLJCutCoulCutCuda(LAMMPS *lmp) : PairLJCutCoulCut(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJCutCoulCutCuda::allocate()
{
if(! allocated) PairLJCutCoulCut::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulCutCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJCutCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulCutCuda::settings(int narg, char **arg)
{
PairLJCutCoulCut::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulCutCuda::coeff(int narg, char **arg)
{
PairLJCutCoulCut::coeff(narg, arg);
allocate();
}
void PairLJCutCoulCutCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/cut/coul/cut/cuda requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
}
void PairLJCutCoulCutCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list\n");)
PairLJCutCoulCut::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list end\n");)
}
void PairLJCutCoulCutCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJCutCoulCut::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

View File

@ -0,0 +1,57 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/coul/cut/cuda,PairLJCutCoulCutCuda)
#else
#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H
#define LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H
#include "pair_lj_cut_coul_cut.h"
namespace LAMMPS_NS {
class PairLJCutCoulCutCuda : public PairLJCutCoulCut
{
public:
PairLJCutCoulCutCuda(class LAMMPS *);
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_list(int, class NeighList *);
void init_style();
void ev_setup(int eflag, int vflag);
protected:
class Cuda *cuda;
void allocate();
bool allocated2;
class CudaNeighList* cuda_neigh_list;
};
}
#endif
#endif

View File

@ -0,0 +1,168 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
Contributing author: Paul Crozier (SNL)
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_lj_cut_coul_debye_cuda.h"
#include "pair_lj_cut_coul_debye_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
using namespace LAMMPS_NS;
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PairLJCutCoulDebyeCuda::PairLJCutCoulDebyeCuda(LAMMPS *lmp) : PairLJCutCoulDebye(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
allocated2 = false;
cuda->shared_data.pair.cudable_force = 1;
cuda->setSystemParams();
}
/* ----------------------------------------------------------------------
remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */
void PairLJCutCoulDebyeCuda::allocate()
{
if(! allocated) PairLJCutCoulDebye::allocate();
if(! allocated2)
{
allocated2 = true;
cuda->shared_data.pair.cut = cut_lj;
cuda->shared_data.pair.cut_coul= cut_coul;
cuda->shared_data.pair.coeff1 = lj1;
cuda->shared_data.pair.coeff2 = lj2;
cuda->shared_data.pair.coeff3 = lj3;
cuda->shared_data.pair.coeff4 = lj4;
cuda->shared_data.pair.offset = offset;
cuda->shared_data.pair.special_lj = force->special_lj;
cuda->shared_data.pair.special_coul = force->special_coul;
}
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDebyeCuda::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
if(eflag) cuda->cu_eng_vdwl->upload();
if(eflag) cuda->cu_eng_coul->upload();
if(vflag) cuda->cu_virial->upload();
Cuda_PairLJCutCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
if(not cuda->shared_data.pair.collect_forces_later)
{
if(eflag) cuda->cu_eng_vdwl->download();
if(eflag) cuda->cu_eng_coul->download();
if(vflag) cuda->cu_virial->download();
}
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDebyeCuda::settings(int narg, char **arg)
{
PairLJCutCoulDebye::settings(narg, arg);
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDebyeCuda::coeff(int narg, char **arg)
{
PairLJCutCoulDebye::coeff(narg, arg);
allocate();
}
void PairLJCutCoulDebyeCuda::init_style()
{
if (!atom->q_flag)
error->all("Pair style lj/cut/coul/debye/cuda requires atom attribute q");
// request regular or rRESPA neighbor lists
int irequest;
irequest = neighbor->request(this);
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->cudable = 1;
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
}
void PairLJCutCoulDebyeCuda::init_list(int id, NeighList *ptr)
{
MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list\n");)
PairLJCutCoulDebye::init_list(id, ptr);
#ifndef CUDA_USE_BINNING
// right now we can only handle verlet (id 0), not respa
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
// see Neighbor::init() for details on lammps lists' logic
#endif
MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list end\n");)
}
void PairLJCutCoulDebyeCuda::ev_setup(int eflag, int vflag)
{
int maxeatomold=maxeatom;
PairLJCutCoulDebye::ev_setup(eflag,vflag);
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
if (eflag_atom && atom->nmax > maxeatomold)
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
}

Some files were not shown because too many files have changed in this diff Show More