forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6261 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
3692e8c68b
commit
2ea09945bf
|
@ -4,31 +4,189 @@
|
|||
|
||||
if (test $1 = 1) then
|
||||
|
||||
if (test -e ../Makefile.package) then
|
||||
sed -i -e '/include ..\/..\/lib\/cuda\/Makefile.common/d' ../Makefile.package
|
||||
sed -i -e 's/-llammpscuda -lcuda -lcudart -lrt //' ../Makefile.package
|
||||
sed -i -e 's/-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include //' ../Makefile.package
|
||||
sed -i -e 's/-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) -DLMP_USER_CUDA //' ../Makefile.package
|
||||
sed -i '1 i include ..\/..\/lib\/cuda\/Makefile.common' ../Makefile.package
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_LIB =[ \t]*|&-llammpscuda -lcuda -lcudart -lrt |' ../Makefile.package
|
||||
if (test ! -e ../Makefile.package) then
|
||||
cp ../Makefile.package.empty ../Makefile.package
|
||||
fi
|
||||
|
||||
sed -i -e '/^include.*cuda.*$/d' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*lrt[^ \t]* //g' ../Makefile.package
|
||||
sed -i '4 i include ..\/..\/lib\/cuda\/Makefile.common' ../Makefile.package
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/cuda -DLMP_USER_CUDA |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/cuda |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_LIB =[ \t]*|&-llammpscuda |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSINC =[ \t]*|&-I$(CUDA_INSTALL_PATH)\/include |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&-L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(CUDA_USRLIB_CONDITIONAL) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&-lcuda -lcudart -lrt |' ../Makefile.package
|
||||
|
||||
if (test -e ../atom_vec_angle.cpp) then
|
||||
cp atom_vec_angle_cuda.cpp ..
|
||||
cp atom_vec_angle_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../atom_vec_full.cpp) then
|
||||
cp atom_vec_full_cuda.cpp ..
|
||||
cp atom_vec_full_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../fix_freeze.cpp) then
|
||||
cp fix_freeze_cuda.cpp ..
|
||||
cp fix_freeze_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_born_coul_long.cpp) then
|
||||
cp pair_born_coul_long_cuda.cpp ..
|
||||
cp pair_born_coul_long_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_buck_coul_long.cpp) then
|
||||
cp pair_buck_coul_long_cuda.cpp ..
|
||||
cp pair_buck_coul_long_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_cg_cmm.cpp) then
|
||||
cp pair_cg_cmm_cuda.cpp ..
|
||||
cp pair_cg_cmm_coul_cut_cuda.cpp ..
|
||||
cp pair_cg_cmm_coul_debye_cuda.cpp ..
|
||||
cp pair_cg_cmm_cuda.h ..
|
||||
cp pair_cg_cmm_coul_cut_cuda.h ..
|
||||
cp pair_cg_cmm_coul_debye_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_cg_cmm_coul_long.cpp) then
|
||||
cp pair_cg_cmm_coul_long_cuda.cpp ..
|
||||
cp pair_cg_cmm_coul_long_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pppm.cpp) then
|
||||
cp pppm_cuda.cpp ..
|
||||
cp fft3d_cuda.cpp ..
|
||||
cp fft3d_wrap_cuda.cpp ..
|
||||
cp pppm_cuda.h ..
|
||||
cp fft3d_cuda.h ..
|
||||
cp fft3d_wrap_cuda.h ..
|
||||
cp pair_lj_cut_coul_long_cuda.cpp ..
|
||||
cp pair_lj_cut_coul_long_cuda.h ..
|
||||
fi
|
||||
|
||||
|
||||
if (test -e ../pair_eam.cpp) then
|
||||
cp pair_eam_alloy_cuda.cpp ..
|
||||
cp pair_eam_cuda.cpp ..
|
||||
cp pair_eam_fs_cuda.cpp ..
|
||||
cp pair_eam_alloy_cuda.h ..
|
||||
cp pair_eam_cuda.h ..
|
||||
cp pair_eam_fs_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_gran_hooke.cpp) then
|
||||
cp pair_gran_hooke_cuda.cpp ..
|
||||
cp pair_gran_hooke_cuda.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_lj_charmm_coul_charmm.cpp) then
|
||||
cp pair_lj_charmm_coul_charmm_cuda.cpp ..
|
||||
cp pair_lj_charmm_coul_charmm_implicit_cuda.cpp ..
|
||||
cp pair_lj_charmm_coul_charmm_cuda.h ..
|
||||
cp pair_lj_charmm_coul_charmm_implicit_cuda.h ..
|
||||
if (test -e ../pair_lj_charmm_coul_long.cpp) then
|
||||
cp pair_lj_charmm_coul_long_cuda.cpp ..
|
||||
cp pair_lj_charmm_coul_long_cuda.h ..
|
||||
fi
|
||||
fi
|
||||
|
||||
if (test -e ../pair_lj_class2.cpp) then
|
||||
cp pair_lj_class2_coul_cut_cuda.cpp ..
|
||||
cp pair_lj_class2_cuda.cpp ..
|
||||
cp pair_lj_class2_coul_cut_cuda.h ..
|
||||
cp pair_lj_class2_cuda.h ..
|
||||
if (test -e ../pair_lj_class2_coul_long.cpp) then
|
||||
cp pair_lj_class2_coul_long_cuda.cpp ..
|
||||
cp pair_lj_class2_coul_long_cuda.h ..
|
||||
fi
|
||||
fi
|
||||
|
||||
cp atom_vec_atomic_cuda.cpp ..
|
||||
cp atom_vec_charge_cuda.cpp ..
|
||||
cp comm_cuda.cpp ..
|
||||
cp compute_pe_cuda.cpp ..
|
||||
cp compute_pressure_cuda.cpp ..
|
||||
cp compute_temp_cuda.cpp ..
|
||||
cp compute_temp_partial_cuda.cpp ..
|
||||
cp domain_cuda.cpp ..
|
||||
cp fix_addforce_cuda.cpp ..
|
||||
cp fix_aveforce_cuda.cpp ..
|
||||
cp fix_enforce2d_cuda.cpp ..
|
||||
cp fix_gravity_cuda.cpp ..
|
||||
cp fix_nh_cuda.cpp ..
|
||||
cp fix_npt_cuda.cpp ..
|
||||
cp fix_nve_cuda.cpp ..
|
||||
cp fix_nvt_cuda.cpp ..
|
||||
cp fix_set_force_cuda.cpp ..
|
||||
cp fix_shake_cuda.cpp ..
|
||||
cp fix_temp_berendsen_cuda.cpp ..
|
||||
cp fix_temp_rescale_cuda.cpp ..
|
||||
cp fix_temp_rescale_limit_cuda.cpp ..
|
||||
cp fix_viscous_cuda.cpp ..
|
||||
cp modify_cuda.cpp ..
|
||||
cp neighbor_cuda.cpp ..
|
||||
cp neigh_full_cuda.cpp ..
|
||||
cp pair_buck_coul_cut_cuda.cpp ..
|
||||
cp pair_buck_cuda.cpp ..
|
||||
cp pair_lj96_cut_cuda.cpp ..
|
||||
cp pair_lj_cut_coul_cut_cuda.cpp ..
|
||||
cp pair_lj_cut_coul_debye_cuda.cpp ..
|
||||
cp pair_lj_cut_cuda.cpp ..
|
||||
cp pair_lj_cut_experimental_cuda.cpp ..
|
||||
cp pair_lj_expand_cuda.cpp ..
|
||||
cp pair_lj_gromacs_coul_gromacs_cuda.cpp ..
|
||||
cp pair_lj_gromacs_cuda.cpp ..
|
||||
cp pair_lj_smooth_cuda.cpp ..
|
||||
cp pair_morse_cuda.cpp ..
|
||||
cp pppm_cuda.cpp ..
|
||||
cp verlet_cuda.cpp ..
|
||||
|
||||
cp cuda.cpp ..
|
||||
cp cuda_neigh_list.cpp ..
|
||||
|
||||
cp atom_vec_atomic_cuda.h ..
|
||||
cp atom_vec_charge_cuda.h ..
|
||||
cp comm_cuda.h ..
|
||||
cp compute_pe_cuda.h ..
|
||||
cp compute_pressure_cuda.h ..
|
||||
cp compute_temp_cuda.h ..
|
||||
cp compute_temp_partial_cuda.h ..
|
||||
cp domain_cuda.h ..
|
||||
cp fix_addforce_cuda.h ..
|
||||
cp fix_aveforce_cuda.h ..
|
||||
cp fix_enforce2d_cuda.h ..
|
||||
cp fix_gravity_cuda.h ..
|
||||
cp fix_nh_cuda.h ..
|
||||
cp fix_npt_cuda.h ..
|
||||
cp fix_nve_cuda.h ..
|
||||
cp fix_nvt_cuda.h ..
|
||||
cp fix_set_force_cuda.h ..
|
||||
cp fix_shake_cuda.h ..
|
||||
cp fix_temp_berendsen_cuda.h ..
|
||||
cp fix_temp_rescale_cuda.h ..
|
||||
cp fix_temp_rescale_limit_cuda.h ..
|
||||
cp fix_viscous_cuda.h ..
|
||||
cp modify_cuda.h ..
|
||||
cp neighbor_cuda.h ..
|
||||
cp pair_buck_coul_cut_cuda.h ..
|
||||
cp pair_buck_cuda.h ..
|
||||
|
||||
cp pair_lj96_cut_cuda.h ..
|
||||
cp pair_lj_cut_coul_cut_cuda.h ..
|
||||
cp pair_lj_cut_coul_debye_cuda.h ..
|
||||
cp pair_lj_cut_cuda.h ..
|
||||
cp pair_lj_cut_experimental_cuda.h ..
|
||||
cp pair_lj_expand_cuda.h ..
|
||||
cp pair_lj_gromacs_coul_gromacs_cuda.h ..
|
||||
cp pair_lj_gromacs_cuda.h ..
|
||||
cp pair_lj_smooth_cuda.h ..
|
||||
cp pair_morse_cuda.h ..
|
||||
cp verlet_cuda.h ..
|
||||
|
||||
cp cuda.h ..
|
||||
|
@ -42,26 +200,136 @@ if (test $1 = 1) then
|
|||
elif (test $1 = 0) then
|
||||
|
||||
if (test -e ../Makefile.package) then
|
||||
sed -i -e '/include ..\/..\/lib\/cuda\/Makefile.common/d' ../Makefile.package
|
||||
sed -i -e 's/-llammpscuda -lcuda -lcudart -lrt //' ../Makefile.package
|
||||
sed -i -e 's/-I..\/..\/lib\/cuda -I$(CUDA_INSTALL_PATH)\/include //' ../Makefile.package
|
||||
sed -i -e 's/-L..\/..\/lib\/cuda -L$(CUDA_INSTALL_PATH)\/lib64 -L$(CUDA_INSTALL_PATH)\/lib $(USRLIB_CONDITIONAL) -DLMP_USER_CUDA //' ../Makefile.package
|
||||
sed -i -e '/^include.*cuda.*$/d' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*cuda[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*CUDA[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*lrt[^ \t]* //g' ../Makefile.package
|
||||
fi
|
||||
|
||||
rm ../atom_vec_angle_cuda.cpp
|
||||
rm ../atom_vec_atomic_cuda.cpp
|
||||
rm ../atom_vec_charge_cuda.cpp
|
||||
rm ../atom_vec_full_cuda.cpp
|
||||
rm ../comm_cuda.cpp
|
||||
rm ../compute_pe_cuda.cpp
|
||||
rm ../compute_pressure_cuda.cpp
|
||||
rm ../compute_temp_cuda.cpp
|
||||
rm ../compute_temp_partial_cuda.cpp
|
||||
rm ../domain_cuda.cpp
|
||||
rm ../fft3d_cuda.cpp
|
||||
rm ../fft3d_wrap_cuda.cpp
|
||||
rm ../fix_addforce_cuda.cpp
|
||||
rm ../fix_aveforce_cuda.cpp
|
||||
rm ../fix_enforce2d_cuda.cpp
|
||||
rm ../fix_freeze_cuda.cpp
|
||||
rm ../fix_gravity_cuda.cpp
|
||||
rm ../fix_nh_cuda.cpp
|
||||
rm ../fix_npt_cuda.cpp
|
||||
rm ../fix_nve_cuda.cpp
|
||||
rm ../fix_nvt_cuda.cpp
|
||||
rm ../fix_set_force_cuda.cpp
|
||||
rm ../fix_shake_cuda.cpp
|
||||
rm ../fix_temp_berendsen_cuda.cpp
|
||||
rm ../fix_temp_rescale_cuda.cpp
|
||||
rm ../fix_temp_rescale_limit_cuda.cpp
|
||||
rm ../fix_viscous_cuda.cpp
|
||||
rm ../modify_cuda.cpp
|
||||
rm ../neighbor_cuda.cpp
|
||||
rm ../neigh_full_cuda.cpp
|
||||
rm ../pair_born_coul_long_cuda.cpp
|
||||
rm ../pair_buck_coul_cut_cuda.cpp
|
||||
rm ../pair_buck_coul_long_cuda.cpp
|
||||
rm ../pair_buck_cuda.cpp
|
||||
rm ../pair_cg_cmm_coul_cut_cuda.cpp
|
||||
rm ../pair_cg_cmm_coul_debye_cuda.cpp
|
||||
rm ../pair_cg_cmm_coul_long_cuda.cpp
|
||||
rm ../pair_cg_cmm_cuda.cpp
|
||||
rm ../pair_eam_alloy_cuda.cpp
|
||||
rm ../pair_eam_cuda.cpp
|
||||
rm ../pair_eam_fs_cuda.cpp
|
||||
rm ../pair_gran_hooke_cuda.cpp
|
||||
rm ../pair_lj96_cut_cuda.cpp
|
||||
rm ../pair_lj_charmm_coul_charmm_cuda.cpp
|
||||
rm ../pair_lj_charmm_coul_charmm_implicit_cuda.cpp
|
||||
rm ../pair_lj_charmm_coul_long_cuda.cpp
|
||||
rm ../pair_lj_class2_coul_cut_cuda.cpp
|
||||
rm ../pair_lj_class2_coul_long_cuda.cpp
|
||||
rm ../pair_lj_class2_cuda.cpp
|
||||
rm ../pair_lj_cut_coul_cut_cuda.cpp
|
||||
rm ../pair_lj_cut_coul_debye_cuda.cpp
|
||||
rm ../pair_lj_cut_coul_long_cuda.cpp
|
||||
rm ../pair_lj_cut_cuda.cpp
|
||||
rm ../pair_lj_cut_experimental_cuda.cpp
|
||||
rm ../pair_lj_expand_cuda.cpp
|
||||
rm ../pair_lj_gromacs_coul_gromacs_cuda.cpp
|
||||
rm ../pair_lj_gromacs_cuda.cpp
|
||||
rm ../pair_lj_smooth_cuda.cpp
|
||||
rm ../pair_morse_cuda.cpp
|
||||
rm ../pppm_cuda.cpp
|
||||
rm ../verlet_cuda.cpp
|
||||
|
||||
rm ../cuda.cpp
|
||||
rm ../cuda_neigh_list.cpp
|
||||
|
||||
rm ../atom_vec_angle_cuda.h
|
||||
rm ../atom_vec_atomic_cuda.h
|
||||
rm ../atom_vec_charge_cuda.h
|
||||
rm ../atom_vec_full_cuda.h
|
||||
rm ../comm_cuda.h
|
||||
rm ../compute_pe_cuda.h
|
||||
rm ../compute_pressure_cuda.h
|
||||
rm ../compute_temp_cuda.h
|
||||
rm ../compute_temp_partial_cuda.h
|
||||
rm ../domain_cuda.h
|
||||
rm ../fft3d_cuda.h
|
||||
rm ../fft3d_wrap_cuda.h
|
||||
rm ../fix_addforce_cuda.h
|
||||
rm ../fix_aveforce_cuda.h
|
||||
rm ../fix_enforce2d_cuda.h
|
||||
rm ../fix_freeze_cuda.h
|
||||
rm ../fix_gravity_cuda.h
|
||||
rm ../fix_nh_cuda.h
|
||||
rm ../fix_npt_cuda.h
|
||||
rm ../fix_nve_cuda.h
|
||||
rm ../fix_nvt_cuda.h
|
||||
rm ../fix_set_force_cuda.h
|
||||
rm ../fix_shake_cuda.h
|
||||
rm ../fix_temp_berendsen_cuda.h
|
||||
rm ../fix_temp_rescale_cuda.h
|
||||
rm ../fix_temp_rescale_limit_cuda.h
|
||||
rm ../fix_viscous_cuda.h
|
||||
rm ../modify_cuda.h
|
||||
rm ../neighbor_cuda.h
|
||||
rm ../pair_born_coul_long_cuda.h
|
||||
rm ../pair_buck_coul_cut_cuda.h
|
||||
rm ../pair_buck_coul_long_cuda.h
|
||||
rm ../pair_buck_cuda.h
|
||||
rm ../pair_cg_cmm_coul_cut_cuda.h
|
||||
rm ../pair_cg_cmm_coul_debye_cuda.h
|
||||
rm ../pair_cg_cmm_coul_long_cuda.h
|
||||
rm ../pair_cg_cmm_cuda.h
|
||||
rm ../pair_eam_alloy_cuda.h
|
||||
rm ../pair_eam_cuda.h
|
||||
rm ../pair_eam_fs_cuda.h
|
||||
rm ../pair_gran_hooke_cuda.h
|
||||
rm ../pair_lj96_cut_cuda.h
|
||||
rm ../pair_lj_charmm_coul_charmm_cuda.h
|
||||
rm ../pair_lj_charmm_coul_charmm_implicit_cuda.h
|
||||
rm ../pair_lj_charmm_coul_long_cuda.h
|
||||
rm ../pair_lj_class2_coul_cut_cuda.h
|
||||
rm ../pair_lj_class2_coul_long_cuda.h
|
||||
rm ../pair_lj_class2_cuda.h
|
||||
rm ../pair_lj_cut_coul_cut_cuda.h
|
||||
rm ../pair_lj_cut_coul_debye_cuda.h
|
||||
rm ../pair_lj_cut_coul_long_cuda.h
|
||||
rm ../pair_lj_cut_cuda.h
|
||||
rm ../pair_lj_cut_experimental_cuda.h
|
||||
rm ../pair_lj_expand_cuda.h
|
||||
rm ../pair_lj_gromacs_coul_gromacs_cuda.h
|
||||
rm ../pair_lj_gromacs_cuda.h
|
||||
rm ../pair_lj_smooth_cuda.h
|
||||
rm ../pair_morse_cuda.h
|
||||
rm ../pppm_cuda.h
|
||||
rm ../verlet_cuda.h
|
||||
|
||||
rm ../cuda.h
|
||||
|
|
|
@ -0,0 +1,476 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "atom_vec_angle_cuda.h"
|
||||
#include "comm_cuda_cu.h"
|
||||
#include "atom_vec_angle_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "universe.h"
|
||||
#include "comm.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define BUFFACTOR 1.5
|
||||
#define BUFEXTRA 1000
|
||||
#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image molecule
|
||||
|
||||
#define BUF_FLOAT double
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecAngleCuda::AtomVecAngleCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
AtomVecAngle(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
maxsend=0;
|
||||
cudable=true;
|
||||
cuda_init_done=false;
|
||||
max_nsend=0;
|
||||
cu_copylist=NULL;
|
||||
copylist=NULL;
|
||||
copylist2=NULL;
|
||||
}
|
||||
|
||||
void AtomVecAngleCuda::grow_copylist(int new_max_nsend)
|
||||
{
|
||||
max_nsend=new_max_nsend;
|
||||
delete cu_copylist;
|
||||
delete [] copylist2;
|
||||
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
|
||||
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
|
||||
copylist2 = new int[max_nsend];
|
||||
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
|
||||
}
|
||||
|
||||
void AtomVecAngleCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
|
||||
{
|
||||
int old_maxsend=*maxsend+BUFEXTRA;
|
||||
*maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
if (flag)
|
||||
{
|
||||
if(cuda->pinned)
|
||||
{
|
||||
double* tmp = new double[old_maxsend];
|
||||
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf_send = (double *)
|
||||
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(cuda->pinned)
|
||||
{
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
}
|
||||
else
|
||||
{
|
||||
memory->sfree(*buf_send);
|
||||
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AtomVecAngleCuda::grow_both(int n)
|
||||
{
|
||||
if(cuda->finished_setup)
|
||||
cuda->downloadAll();
|
||||
AtomVecAngle::grow(n);
|
||||
if(cuda->finished_setup)
|
||||
{
|
||||
cuda->checkResize();
|
||||
cuda->uploadAll();
|
||||
}
|
||||
}
|
||||
|
||||
int AtomVecAngleCuda::pack_comm(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAngle::pack_comm(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecAngleCuda::pack_comm_vel(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAngle::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAngleCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAngle::unpack_comm(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
void AtomVecAngleCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAngle::unpack_comm_vel(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAngleCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAngle::pack_reverse(n,first,buf);
|
||||
|
||||
int i,m,last;
|
||||
cuda->cu_f->download();
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = f[i][0];
|
||||
buf[m++] = f[i][1];
|
||||
buf[m++] = f[i][2];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAngleCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAngle::unpack_reverse(n,list,buf); return;}
|
||||
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
cuda->cu_f->download();
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
f[j][0] += buf[m++];
|
||||
f[j][1] += buf[m++];
|
||||
f[j][2] += buf[m++];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAngleCuda::pack_border(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAngle::pack_border(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecAngleCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecAngleCuda::pack_border_vel(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAngle::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecAngleCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAngleCuda::unpack_border(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAngle::unpack_border(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecAngleCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
void AtomVecAngleCuda::unpack_border_vel(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAngle::unpack_border_vel(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecAngleCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack data for atom I for sending to another proc
|
||||
xyz must be 1st 3 values, so comm::exchange() can test on them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
|
||||
{
|
||||
if(cuda->oncpu)
|
||||
return AtomVecAngle::pack_exchange(dim,buf);
|
||||
|
||||
if(not cuda_init_done||domain->box_change)
|
||||
{
|
||||
Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
|
||||
cuda_init_done=true;
|
||||
}
|
||||
double** buf_pointer=(double**) buf;
|
||||
if(*maxsend<atom->nghost || *buf_pointer==NULL)
|
||||
{
|
||||
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
|
||||
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
|
||||
}
|
||||
|
||||
if(max_nsend==0) grow_copylist(200);
|
||||
|
||||
int nsend_atoms = Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
||||
{
|
||||
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
||||
Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
}
|
||||
|
||||
int nlocal=atom->nlocal-nsend_atoms;
|
||||
|
||||
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i>=nlocal) copylist2[i-nlocal]=-1;
|
||||
}
|
||||
|
||||
int actpos=0;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i<nlocal)
|
||||
{
|
||||
while(copylist2[actpos]==-1) actpos++;
|
||||
copylist[j-1]=nlocal+actpos;
|
||||
actpos++;
|
||||
}
|
||||
}
|
||||
cu_copylist->upload();
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
|
||||
int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
|
||||
|
||||
timespec time1,time2;
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
double* buf_p=*buf_pointer;
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
int i=static_cast <int> (buf_p[j+1]);
|
||||
int nextra=0;
|
||||
int k;
|
||||
buf_p[m++] = num_bond[i];
|
||||
for (k = 0; k < num_bond[i]; k++) {
|
||||
buf_p[m++] = bond_type[i][k];
|
||||
buf_p[m++] = bond_atom[i][k];
|
||||
}
|
||||
nextra+=2*num_bond[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = num_angle[i];
|
||||
for (k = 0; k < num_angle[i]; k++) {
|
||||
buf_p[m++] = angle_type[i][k];
|
||||
buf_p[m++] = angle_atom1[i][k];
|
||||
buf_p[m++] = angle_atom2[i][k];
|
||||
buf_p[m++] = angle_atom3[i][k];
|
||||
}
|
||||
nextra+=4*num_angle[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = nspecial[i][0];
|
||||
buf_p[m++] = nspecial[i][1];
|
||||
buf_p[m++] = nspecial[i][2];
|
||||
for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
|
||||
nextra+=nspecial[i][2]+3;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
{
|
||||
int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
|
||||
m+=dm;
|
||||
nextra+=dm;
|
||||
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
}
|
||||
|
||||
if(i<nlocal)AtomVecAngle::copy(copylist[j],i,1);
|
||||
(*buf_pointer)[j+1] = nextra;
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
(*buf_pointer)[0] = nsend_atoms;
|
||||
atom->nlocal-=nsend_atoms;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
//printf("End Pack Exchange\n");
|
||||
if(m==1) return 0;
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAngleCuda::unpack_exchange(double *buf)
|
||||
{
|
||||
// printf("Begin UnPack Exchange\n");
|
||||
if(cuda->oncpu)
|
||||
return AtomVecAngle::unpack_exchange(buf);
|
||||
|
||||
double *sublo,*subhi;
|
||||
int dim=cuda->shared_data.exchange_dim;
|
||||
if(domain->box_change)
|
||||
Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
|
||||
if (domain->triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
int mfirst=0;
|
||||
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
int nsend_atoms=static_cast<int> (buf[0]);
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
|
||||
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
|
||||
int naccept = Cuda_AtomVecAngleCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
|
||||
cu_copylist->download();
|
||||
int m = nsend_atoms*NCUDAEXCHANGE + 1;
|
||||
nlocal+=naccept;
|
||||
|
||||
timespec time1,time2;
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
if(copylist[j]>-1)
|
||||
{
|
||||
int k;
|
||||
int i=copylist[j];
|
||||
num_bond[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_bond[i]; k++) {
|
||||
bond_type[i][k] = static_cast<int> (buf[m++]);
|
||||
bond_atom[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
num_angle[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_angle[i]; k++) {
|
||||
angle_type[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom1[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom2[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom3[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
nspecial[i][0] = static_cast<int> (buf[m++]);
|
||||
nspecial[i][1] = static_cast<int> (buf[m++]);
|
||||
nspecial[i][2] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < nspecial[i][2]; k++)
|
||||
special[i][k] = static_cast<int> (buf[m++]);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(i,&buf[m]);
|
||||
|
||||
}
|
||||
else
|
||||
m+=static_cast <int> (buf[j+1]);
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
atom->nlocal=nlocal;
|
||||
mfirst+=m;
|
||||
buf=&buf[m];
|
||||
}
|
||||
return mfirst;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(angle/cuda,AtomVecAngleCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_ANGLE_CUDA_H
|
||||
#define LMP_ATOM_VEC_ANGLE_CUDA_H
|
||||
|
||||
#include "atom_vec_angle.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecAngleCuda : public AtomVecAngle {
|
||||
public:
|
||||
AtomVecAngleCuda(class LAMMPS *, int, char **);
|
||||
virtual ~AtomVecAngleCuda() {}
|
||||
void grow_copylist(int n);
|
||||
void grow_send(int n,double** buf_send,int flag);
|
||||
void grow_both(int n);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
bool cuda_init_done;
|
||||
int* copylist;
|
||||
int* copylist2;
|
||||
cCudaData<int, int, xx >* cu_copylist;
|
||||
int max_nsend;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,407 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "atom_vec_atomic_cuda.h"
|
||||
#include "comm_cuda_cu.h"
|
||||
#include "atom_vec_atomic_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "comm.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define BUFFACTOR 1.5
|
||||
#define BUFEXTRA 1000
|
||||
#define NCUDAEXCHANGE 11 //nextra x y z vx vy vz tag type mask image
|
||||
|
||||
|
||||
#define BUF_FLOAT double
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecAtomicCuda::AtomVecAtomicCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
AtomVecAtomic(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
maxsend=0;
|
||||
cudable=true;
|
||||
cuda_init_done=false;
|
||||
max_nsend=0;
|
||||
cu_copylist=NULL;
|
||||
copylist=NULL;
|
||||
copylist2=NULL;
|
||||
}
|
||||
|
||||
void AtomVecAtomicCuda::grow_copylist(int new_max_nsend)
|
||||
{
|
||||
max_nsend=new_max_nsend;
|
||||
delete cu_copylist;
|
||||
delete [] copylist2;
|
||||
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
|
||||
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
|
||||
copylist2 = new int[max_nsend];
|
||||
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
|
||||
}
|
||||
|
||||
void AtomVecAtomicCuda::grow_send(int n,double** buf_send,int flag)
|
||||
{
|
||||
int old_maxsend=*maxsend+BUFEXTRA;
|
||||
*maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
if (flag)
|
||||
{
|
||||
if(cuda->pinned)
|
||||
{
|
||||
double* tmp = new double[old_maxsend];
|
||||
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf_send = (double *)
|
||||
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(cuda->pinned)
|
||||
{
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
}
|
||||
else
|
||||
{
|
||||
memory->sfree(*buf_send);
|
||||
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AtomVecAtomicCuda::grow_both(int n)
|
||||
{
|
||||
if(cuda->finished_setup)
|
||||
cuda->downloadAll();
|
||||
AtomVecAtomic::grow(n);
|
||||
if(cuda->finished_setup)
|
||||
{
|
||||
cuda->checkResize();
|
||||
cuda->uploadAll();
|
||||
}
|
||||
}
|
||||
|
||||
int AtomVecAtomicCuda::pack_comm(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAtomic::pack_comm(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecAtomicCuda::pack_comm_vel(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAtomic::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAtomicCuda::unpack_comm(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAtomic::unpack_comm(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
void AtomVecAtomicCuda::unpack_comm_vel(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAtomic::unpack_comm_vel(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicCuda::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAtomic::pack_reverse(n,first,buf);
|
||||
|
||||
int i,m,last;
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = f[i][0];
|
||||
buf[m++] = f[i][1];
|
||||
buf[m++] = f[i][2];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAtomicCuda::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAtomic::unpack_reverse(n,list,buf); return;}
|
||||
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
f[j][0] += buf[m++];
|
||||
f[j][1] += buf[m++];
|
||||
f[j][2] += buf[m++];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicCuda::pack_border(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAtomic::pack_border(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecAtomicCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecAtomicCuda::pack_border_vel(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecAtomic::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecAtomicCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAtomicCuda::unpack_border(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAtomic::unpack_border(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecAtomicCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
|
||||
}
|
||||
|
||||
void AtomVecAtomicCuda::unpack_border_vel(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecAtomic::unpack_border_vel(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecAtomicCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
pack data for atom I for sending to another proc
|
||||
xyz must be 1st 3 values, so comm::exchange() can test on them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
int AtomVecAtomicCuda::pack_exchange(int dim, double *buf)
|
||||
{
|
||||
if(cuda->oncpu)
|
||||
return AtomVecAtomic::pack_exchange(dim,buf);
|
||||
|
||||
if(not cuda_init_done||domain->box_change)
|
||||
{
|
||||
Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
|
||||
cuda_init_done=true;
|
||||
}
|
||||
double** buf_pointer=(double**) buf;
|
||||
if(*maxsend<atom->nghost || *buf_pointer==NULL)
|
||||
{
|
||||
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
|
||||
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
|
||||
}
|
||||
|
||||
if(max_nsend==0) grow_copylist(200);
|
||||
|
||||
int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
|
||||
if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
|
||||
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
||||
{
|
||||
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
||||
Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
}
|
||||
|
||||
int nlocal=atom->nlocal-nsend_atoms;
|
||||
|
||||
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i>=nlocal) copylist2[i-nlocal]=-1;
|
||||
}
|
||||
|
||||
int actpos=0;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i<nlocal)
|
||||
{
|
||||
while(copylist2[actpos]==-1) actpos++;
|
||||
copylist[j-1]=nlocal+actpos;
|
||||
actpos++;
|
||||
}
|
||||
}
|
||||
cu_copylist->upload();
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
|
||||
int m = Cuda_AtomVecAtomicCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
|
||||
if (atom->nextra_grow)
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
int i=static_cast <int> ((*buf_pointer)[j+1]);
|
||||
int nextra=0;
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
|
||||
int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
|
||||
m+=dm;
|
||||
nextra+=dm;
|
||||
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
|
||||
if(m>*maxsend) grow_send(m,buf_pointer,1);
|
||||
}
|
||||
(*buf_pointer)[j+1] = nextra;
|
||||
|
||||
}
|
||||
|
||||
(*buf_pointer)[0] = nsend_atoms;
|
||||
atom->nlocal-=nsend_atoms;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
|
||||
if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicCuda::unpack_exchange(double *buf)
|
||||
{
|
||||
//printf("Unpack Begin\n");
|
||||
if(cuda->oncpu)
|
||||
return AtomVecAtomic::unpack_exchange(buf);
|
||||
|
||||
double *sublo,*subhi;
|
||||
|
||||
int dim=cuda->shared_data.exchange_dim;
|
||||
if(domain->box_change)
|
||||
Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
|
||||
if (domain->triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
int mfirst=0;
|
||||
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
int nsend_atoms=static_cast<int> (buf[0]);
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
|
||||
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
|
||||
int naccept = Cuda_AtomVecAtomicCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
|
||||
cu_copylist->download();
|
||||
int m = nsend_atoms*NCUDAEXCHANGE + 1;
|
||||
nlocal+=naccept;
|
||||
if (atom->nextra_grow)
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
if(copylist[j]>-1)
|
||||
{
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(copylist[j],&buf[m]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m+=static_cast <int> (buf[j+1]);
|
||||
}
|
||||
}
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
atom->nlocal=nlocal;
|
||||
mfirst+=m;
|
||||
buf=&buf[m];
|
||||
}
|
||||
return mfirst;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(atomic/cuda,AtomVecAtomicCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_ATOMIC_CUDA_H
|
||||
#define LMP_ATOM_VEC_ATOMIC_CUDA_H
|
||||
|
||||
#include "atom_vec_atomic.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecAtomicCuda : public AtomVecAtomic {
|
||||
public:
|
||||
AtomVecAtomicCuda(class LAMMPS *, int, char **);
|
||||
virtual ~AtomVecAtomicCuda() {}
|
||||
void grow_copylist(int n);
|
||||
void grow_send(int n,double** buf_send,int flag);
|
||||
void grow_both(int n);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
bool cuda_init_done;
|
||||
int* copylist;
|
||||
int* copylist2;
|
||||
cCudaData<int, int, xx >* cu_copylist;
|
||||
int max_nsend;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,407 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "atom_vec_charge_cuda.h"
|
||||
#include "comm_cuda_cu.h"
|
||||
#include "atom_vec_charge_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "comm.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define BUFFACTOR 1.5
|
||||
#define BUFEXTRA 1000
|
||||
#define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image q
|
||||
|
||||
#define BUF_FLOAT double
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecChargeCuda::AtomVecChargeCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
AtomVecCharge(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
maxsend=0;
|
||||
cudable=true;
|
||||
cuda_init_done=false;
|
||||
max_nsend=0;
|
||||
cu_copylist=NULL;
|
||||
copylist=NULL;
|
||||
copylist2=NULL;
|
||||
}
|
||||
|
||||
void AtomVecChargeCuda::grow_copylist(int new_max_nsend)
|
||||
{
|
||||
max_nsend=new_max_nsend;
|
||||
delete cu_copylist;
|
||||
delete [] copylist2;
|
||||
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
|
||||
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
|
||||
copylist2 = new int[max_nsend];
|
||||
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
|
||||
}
|
||||
|
||||
void AtomVecChargeCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
|
||||
{
|
||||
int old_maxsend=*maxsend+BUFEXTRA;
|
||||
*maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
if (flag)
|
||||
{
|
||||
if(cuda->pinned)
|
||||
{
|
||||
double* tmp = new double[old_maxsend];
|
||||
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf_send = (double *)
|
||||
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(cuda->pinned)
|
||||
{
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
}
|
||||
else
|
||||
{
|
||||
memory->sfree(*buf_send);
|
||||
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AtomVecChargeCuda::grow_both(int n)
|
||||
{
|
||||
if(cuda->finished_setup)
|
||||
cuda->downloadAll();
|
||||
AtomVecCharge::grow(n);
|
||||
if(cuda->finished_setup)
|
||||
{
|
||||
cuda->checkResize();
|
||||
cuda->uploadAll();
|
||||
}
|
||||
}
|
||||
|
||||
int AtomVecChargeCuda::pack_comm(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecCharge::pack_comm(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecChargeCuda::pack_comm_vel(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecCharge::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecChargeCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecCharge::unpack_comm(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
void AtomVecChargeCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecCharge::unpack_comm_vel(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecChargeCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecCharge::pack_reverse(n,first,buf);
|
||||
|
||||
int i,m,last;
|
||||
cuda->cu_f->download();
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = f[i][0];
|
||||
buf[m++] = f[i][1];
|
||||
buf[m++] = f[i][2];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecChargeCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecCharge::unpack_reverse(n,list,buf); return;}
|
||||
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
cuda->cu_f->download();
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
f[j][0] += buf[m++];
|
||||
f[j][1] += buf[m++];
|
||||
f[j][2] += buf[m++];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecChargeCuda::pack_border(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecCharge::pack_border(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecChargeCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecChargeCuda::pack_border_vel(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecCharge::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecChargeCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecChargeCuda::unpack_border(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecCharge::unpack_border(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecChargeCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
void AtomVecChargeCuda::unpack_border_vel(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecCharge::unpack_border_vel(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecChargeCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack data for atom I for sending to another proc
|
||||
xyz must be 1st 3 values, so comm::exchange() can test on them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
int AtomVecChargeCuda::pack_exchange(int dim, double *buf)
|
||||
{
|
||||
if(cuda->oncpu)
|
||||
return AtomVecCharge::pack_exchange(dim,buf);
|
||||
|
||||
if(not cuda_init_done||domain->box_change)
|
||||
{
|
||||
Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
|
||||
cuda_init_done=true;
|
||||
}
|
||||
double** buf_pointer=(double**) buf;
|
||||
if(*maxsend<atom->nghost || *buf_pointer==NULL)
|
||||
{
|
||||
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
|
||||
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
|
||||
}
|
||||
|
||||
if(max_nsend==0) grow_copylist(200);
|
||||
|
||||
int nsend_atoms = Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
||||
{
|
||||
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
||||
Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
}
|
||||
|
||||
int nlocal=atom->nlocal-nsend_atoms;
|
||||
|
||||
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i>=nlocal) copylist2[i-nlocal]=-1;
|
||||
}
|
||||
|
||||
int actpos=0;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i<nlocal)
|
||||
{
|
||||
while(copylist2[actpos]==-1) actpos++;
|
||||
copylist[j-1]=nlocal+actpos;
|
||||
actpos++;
|
||||
}
|
||||
}
|
||||
cu_copylist->upload();
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
|
||||
int m = Cuda_AtomVecChargeCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
int i=static_cast <int> ((*buf_pointer)[j+1]);
|
||||
int nextra=0;
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
|
||||
|
||||
int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
|
||||
m+=dm;
|
||||
nextra+=dm;
|
||||
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
|
||||
if(m>*maxsend) grow_send(m,buf_pointer,1);
|
||||
}
|
||||
(*buf_pointer)[j+1] = nextra;
|
||||
}
|
||||
|
||||
(*buf_pointer)[0] = nsend_atoms;
|
||||
atom->nlocal-=nsend_atoms;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
|
||||
if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecChargeCuda::unpack_exchange(double *buf)
|
||||
{
|
||||
if(cuda->oncpu)
|
||||
return AtomVecCharge::unpack_exchange(buf);
|
||||
double *sublo,*subhi;
|
||||
|
||||
int dim=cuda->shared_data.exchange_dim;
|
||||
if(domain->box_change)
|
||||
Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
|
||||
if (domain->triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
int mfirst=0;
|
||||
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
int nsend_atoms=static_cast<int> (buf[0]);
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
|
||||
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
|
||||
int naccept = Cuda_AtomVecChargeCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
|
||||
cu_copylist->download();
|
||||
int m = nsend_atoms*NCUDAEXCHANGE + 1;
|
||||
nlocal+=naccept;
|
||||
if (atom->nextra_grow)
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
if(copylist[j]>-1)
|
||||
{
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(copylist[j],&buf[m]);
|
||||
}
|
||||
else
|
||||
m+=static_cast <int> (buf[j+1]);
|
||||
}
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
atom->nlocal=nlocal;
|
||||
mfirst+=m;
|
||||
buf=&buf[m];
|
||||
}
|
||||
return mfirst;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(charge/cuda,AtomVecChargeCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_CHARGE_CUDA_H
|
||||
#define LMP_ATOM_VEC_CHARGE_CUDA_H
|
||||
|
||||
#include "atom_vec_charge.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecChargeCuda : public AtomVecCharge {
|
||||
public:
|
||||
AtomVecChargeCuda(class LAMMPS *, int, char **);
|
||||
virtual ~AtomVecChargeCuda() {}
|
||||
void grow_copylist(int n);
|
||||
void grow_send(int n,double** buf_send,int flag);
|
||||
void grow_both(int n);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
bool cuda_init_done;
|
||||
int* copylist;
|
||||
int* copylist2;
|
||||
cCudaData<int, int, xx >* cu_copylist;
|
||||
int max_nsend;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,516 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "atom_vec_full_cuda.h"
|
||||
#include "comm_cuda_cu.h"
|
||||
#include "atom_vec_full_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "universe.h"
|
||||
#include "comm.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define BUFFACTOR 1.5
|
||||
#define BUFEXTRA 1000
|
||||
#define NCUDAEXCHANGE 13 //nextra x y z vx vy vz tag type mask image q molecule
|
||||
|
||||
#define BUF_FLOAT double
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecFullCuda::AtomVecFullCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
AtomVecFull(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
maxsend=0;
|
||||
cudable=true;
|
||||
cuda_init_done=false;
|
||||
max_nsend=0;
|
||||
cu_copylist=NULL;
|
||||
copylist=NULL;
|
||||
copylist2=NULL;
|
||||
}
|
||||
|
||||
void AtomVecFullCuda::grow_copylist(int new_max_nsend)
|
||||
{
|
||||
max_nsend=new_max_nsend;
|
||||
delete cu_copylist;
|
||||
delete [] copylist2;
|
||||
if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
|
||||
copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
|
||||
copylist2 = new int[max_nsend];
|
||||
cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
|
||||
}
|
||||
|
||||
void AtomVecFullCuda::grow_send(int n,double** buf_send,int flag) //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
|
||||
{
|
||||
int old_maxsend=*maxsend+BUFEXTRA;
|
||||
*maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
if (flag)
|
||||
{
|
||||
if(cuda->pinned)
|
||||
{
|
||||
double* tmp = new double[old_maxsend];
|
||||
memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
|
||||
delete [] tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf_send = (double *)
|
||||
memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(cuda->pinned)
|
||||
{
|
||||
if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
|
||||
*buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
|
||||
}
|
||||
else
|
||||
{
|
||||
memory->sfree(*buf_send);
|
||||
*buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
|
||||
"comm:buf_send");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AtomVecFullCuda::grow_both(int n)
|
||||
{
|
||||
if(cuda->finished_setup)
|
||||
cuda->downloadAll();
|
||||
AtomVecFull::grow(n);
|
||||
if(cuda->finished_setup)
|
||||
{
|
||||
cuda->checkResize();
|
||||
cuda->uploadAll();
|
||||
}
|
||||
}
|
||||
|
||||
int AtomVecFullCuda::pack_comm(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecFull::pack_comm(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecFullCuda::pack_comm_vel(int n, int* iswap, double *buf,
|
||||
int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecFull::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
if((sizeof(X_FLOAT)!=sizeof(double)) && m)
|
||||
m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecFullCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecFull::unpack_comm(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
void AtomVecFullCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecFull::unpack_comm_vel(n,first,buf); return;}
|
||||
|
||||
Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecFullCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecFull::pack_reverse(n,first,buf);
|
||||
|
||||
int i,m,last;
|
||||
cuda->cu_f->download();
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = f[i][0];
|
||||
buf[m++] = f[i][1];
|
||||
buf[m++] = f[i][2];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecFullCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecFull::unpack_reverse(n,list,buf); return;}
|
||||
|
||||
int i,j,m;
|
||||
|
||||
m = 0;
|
||||
cuda->cu_f->download();
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
f[j][0] += buf[m++];
|
||||
f[j][1] += buf[m++];
|
||||
f[j][2] += buf[m++];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecFullCuda::pack_border(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecFull::pack_border(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecFullCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
return m;
|
||||
}
|
||||
|
||||
int AtomVecFullCuda::pack_border_vel(int n, int *iswap, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
return AtomVecFull::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
|
||||
|
||||
int m = Cuda_AtomVecFullCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecFullCuda::unpack_border(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecFull::unpack_border(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecFullCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
void AtomVecFullCuda::unpack_border_vel(int n, int first, double *buf)
|
||||
{
|
||||
if(not cuda->finished_setup || cuda->oncpu)
|
||||
{AtomVecFull::unpack_border_vel(n,first,buf); return;}
|
||||
while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
|
||||
{
|
||||
grow_both(0);
|
||||
}
|
||||
int flag=Cuda_AtomVecFullCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
|
||||
if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack data for atom I for sending to another proc
|
||||
xyz must be 1st 3 values, so comm::exchange() can test on them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
int AtomVecFullCuda::pack_exchange(int dim, double *buf)
|
||||
{
|
||||
if(cuda->oncpu)
|
||||
return AtomVecFull::pack_exchange(dim,buf);
|
||||
|
||||
if(not cuda_init_done||domain->box_change)
|
||||
{
|
||||
Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
|
||||
cuda_init_done=true;
|
||||
}
|
||||
double** buf_pointer=(double**) buf;
|
||||
if(*maxsend<atom->nghost || *buf_pointer==NULL)
|
||||
{
|
||||
grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
|
||||
*maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
|
||||
}
|
||||
|
||||
if(max_nsend==0) grow_copylist(200);
|
||||
|
||||
int nsend_atoms = Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
|
||||
{
|
||||
grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
|
||||
Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
|
||||
}
|
||||
|
||||
int nlocal=atom->nlocal-nsend_atoms;
|
||||
|
||||
for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i>=nlocal) copylist2[i-nlocal]=-1;
|
||||
}
|
||||
|
||||
int actpos=0;
|
||||
for(int j=1;j<nsend_atoms+1;j++)
|
||||
{
|
||||
int i = static_cast <int> ((*buf_pointer)[j]);
|
||||
if(i<nlocal)
|
||||
{
|
||||
while(copylist2[actpos]==-1) actpos++;
|
||||
copylist[j-1]=nlocal+actpos;
|
||||
actpos++;
|
||||
}
|
||||
}
|
||||
cu_copylist->upload();
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
|
||||
int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
|
||||
|
||||
timespec time1,time2;
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
double* buf_p=*buf_pointer;
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
int i=static_cast <int> (buf_p[j+1]);
|
||||
int nextra=0;
|
||||
int k;
|
||||
buf_p[m++] = num_bond[i];
|
||||
for (k = 0; k < num_bond[i]; k++) {
|
||||
buf_p[m++] = bond_type[i][k];
|
||||
buf_p[m++] = bond_atom[i][k];
|
||||
}
|
||||
nextra+=2*num_bond[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = num_angle[i];
|
||||
for (k = 0; k < num_angle[i]; k++) {
|
||||
buf_p[m++] = angle_type[i][k];
|
||||
buf_p[m++] = angle_atom1[i][k];
|
||||
buf_p[m++] = angle_atom2[i][k];
|
||||
buf_p[m++] = angle_atom3[i][k];
|
||||
}
|
||||
nextra+=4*num_angle[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = num_dihedral[i];
|
||||
for (k = 0; k < num_dihedral[i]; k++) {
|
||||
buf_p[m++] = dihedral_type[i][k];
|
||||
buf_p[m++] = dihedral_atom1[i][k];
|
||||
buf_p[m++] = dihedral_atom2[i][k];
|
||||
buf_p[m++] = dihedral_atom3[i][k];
|
||||
buf_p[m++] = dihedral_atom4[i][k];
|
||||
}
|
||||
nextra+=5*num_dihedral[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = num_improper[i];
|
||||
for (k = 0; k < num_improper[i]; k++) {
|
||||
buf_p[m++] = improper_type[i][k];
|
||||
buf_p[m++] = improper_atom1[i][k];
|
||||
buf_p[m++] = improper_atom2[i][k];
|
||||
buf_p[m++] = improper_atom3[i][k];
|
||||
buf_p[m++] = improper_atom4[i][k];
|
||||
}
|
||||
nextra+=5*num_improper[i]+1;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
buf_p[m++] = nspecial[i][0];
|
||||
buf_p[m++] = nspecial[i][1];
|
||||
buf_p[m++] = nspecial[i][2];
|
||||
for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
|
||||
nextra+=nspecial[i][2]+3;
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
{
|
||||
int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
|
||||
m+=dm;
|
||||
nextra+=dm;
|
||||
if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i);
|
||||
if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
|
||||
}
|
||||
|
||||
if(i<nlocal)AtomVecFull::copy(copylist[j],i,1);
|
||||
(*buf_pointer)[j+1] = nextra;
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
(*buf_pointer)[0] = nsend_atoms;
|
||||
atom->nlocal-=nsend_atoms;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
//printf("End Pack Exchange\n");
|
||||
if(m==1) return 0;
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecFullCuda::unpack_exchange(double *buf)
|
||||
{
|
||||
// printf("Begin UnPack Exchange\n");
|
||||
if(cuda->oncpu)
|
||||
return AtomVecFull::unpack_exchange(buf);
|
||||
|
||||
double *sublo,*subhi;
|
||||
int dim=cuda->shared_data.exchange_dim;
|
||||
if(domain->box_change)
|
||||
Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
|
||||
if (domain->triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
int mfirst=0;
|
||||
for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
int nsend_atoms=static_cast<int> (buf[0]);
|
||||
if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
|
||||
|
||||
if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
|
||||
int naccept = Cuda_AtomVecFullCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
|
||||
cu_copylist->download();
|
||||
int m = nsend_atoms*NCUDAEXCHANGE + 1;
|
||||
nlocal+=naccept;
|
||||
|
||||
timespec time1,time2;
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
for(int j=0;j<nsend_atoms;j++)
|
||||
{
|
||||
if(copylist[j]>-1)
|
||||
{
|
||||
int k;
|
||||
int i=copylist[j];
|
||||
num_bond[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_bond[i]; k++) {
|
||||
bond_type[i][k] = static_cast<int> (buf[m++]);
|
||||
bond_atom[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
num_angle[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_angle[i]; k++) {
|
||||
angle_type[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom1[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom2[i][k] = static_cast<int> (buf[m++]);
|
||||
angle_atom3[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
num_dihedral[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_dihedral[i]; k++) {
|
||||
dihedral_type[i][k] = static_cast<int> (buf[m++]);
|
||||
dihedral_atom1[i][k] = static_cast<int> (buf[m++]);
|
||||
dihedral_atom2[i][k] = static_cast<int> (buf[m++]);
|
||||
dihedral_atom3[i][k] = static_cast<int> (buf[m++]);
|
||||
dihedral_atom4[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
num_improper[i] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < num_improper[i]; k++) {
|
||||
improper_type[i][k] = static_cast<int> (buf[m++]);
|
||||
improper_atom1[i][k] = static_cast<int> (buf[m++]);
|
||||
improper_atom2[i][k] = static_cast<int> (buf[m++]);
|
||||
improper_atom3[i][k] = static_cast<int> (buf[m++]);
|
||||
improper_atom4[i][k] = static_cast<int> (buf[m++]);
|
||||
}
|
||||
|
||||
nspecial[i][0] = static_cast<int> (buf[m++]);
|
||||
nspecial[i][1] = static_cast<int> (buf[m++]);
|
||||
nspecial[i][2] = static_cast<int> (buf[m++]);
|
||||
for (k = 0; k < nspecial[i][2]; k++)
|
||||
special[i][k] = static_cast<int> (buf[m++]);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(i,&buf[m]);
|
||||
|
||||
}
|
||||
else
|
||||
m+=static_cast <int> (buf[j+1]);
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
cuda->shared_data.atom.nlocal=nlocal;
|
||||
cuda->shared_data.atom.update_nlocal=2;
|
||||
atom->nlocal=nlocal;
|
||||
mfirst+=m;
|
||||
buf=&buf[m];
|
||||
}
|
||||
return mfirst;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(full/cuda,AtomVecFullCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_FULL_CUDA_H
|
||||
#define LMP_ATOM_VEC_FULL_CUDA_H
|
||||
|
||||
#include "atom_vec_full.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecFullCuda : public AtomVecFull {
|
||||
public:
|
||||
AtomVecFullCuda(class LAMMPS *, int, char **);
|
||||
virtual ~AtomVecFullCuda() {}
|
||||
void grow_copylist(int n);
|
||||
void grow_send(int n,double** buf_send,int flag);
|
||||
void grow_both(int n);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
bool cuda_init_done;
|
||||
int* copylist;
|
||||
int* copylist2;
|
||||
cCudaData<int, int, xx >* cu_copylist;
|
||||
int max_nsend;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -55,6 +55,8 @@ enum{SINGLE,MULTI};
|
|||
CommCuda::CommCuda(LAMMPS *lmp):Comm(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
cu_pbc=NULL;
|
||||
cu_slablo=NULL;
|
||||
|
|
|
@ -0,0 +1,483 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#define MY_PREFIX comm_cuda
|
||||
#include "cuda_shared.h"
|
||||
#include "cuda_common.h"
|
||||
|
||||
#include "crm_cuda_utils.cu"
|
||||
|
||||
#include "comm_cuda_cu.h"
|
||||
#include "comm_cuda_kernel.cu"
|
||||
#include <ctime>
|
||||
|
||||
void Cuda_CommCuda_UpdateBuffer(cuda_shared_data* sdata,int n)
|
||||
{
|
||||
int size=n*3*sizeof(X_FLOAT);
|
||||
if(sdata->buffersize<size)
|
||||
{
|
||||
MYDBG(printf("Cuda_ComputeTempCuda Resizing Buffer at %p with %i kB to\n",sdata->buffer,sdata->buffersize);)
|
||||
CudaWrapper_FreeCudaData(sdata->buffer,sdata->buffersize);
|
||||
sdata->buffer = CudaWrapper_AllocCudaData(size);
|
||||
sdata->buffersize=size;
|
||||
sdata->buffer_new++;
|
||||
MYDBG(printf("New buffer at %p with %i kB\n",sdata->buffer,sdata->buffersize);)
|
||||
}
|
||||
cudaMemcpyToSymbol(MY_CONST(buffer), & sdata->buffer, sizeof(int*) );
|
||||
}
|
||||
|
||||
|
||||
void Cuda_CommCuda_UpdateNmax(cuda_shared_data* sdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(x) , & sdata->atom.x .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
|
||||
}
|
||||
|
||||
|
||||
void Cuda_CommCuda_Init(cuda_shared_data* sdata)
|
||||
{
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
int ntypesp=sdata->atom.ntypes+1;
|
||||
cudaMemcpyToSymbol(MY_CONST(cuda_ntypes) , &ntypesp, sizeof(int));
|
||||
cudaMemcpyToSymbol(MY_CONST(prd) , sdata->domain.prd, 3*sizeof(X_FLOAT));
|
||||
cudaMemcpyToSymbol(MY_CONST(flag) , &sdata->flag, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(debugdata) , &sdata->debugdata, sizeof(int*));
|
||||
}
|
||||
|
||||
int Cuda_CommCuda_PackComm(cuda_shared_data* sdata,int n,int iswap,void* buf_send,int* pbc,int pbc_flag)
|
||||
{
|
||||
|
||||
timespec time1,time2;
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
X_FLOAT dx=0.0;
|
||||
X_FLOAT dy=0.0;
|
||||
X_FLOAT dz=0.0;
|
||||
if (pbc_flag != 0) {
|
||||
if (sdata->domain.triclinic == 0) {
|
||||
dx = pbc[0]*sdata->domain.prd[0];
|
||||
dy = pbc[1]*sdata->domain.prd[1];
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
} else {
|
||||
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
|
||||
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
}}
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
cudaMemset( sdata->flag,0,sizeof(int));
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
void* buf=sdata->overlap_comm?sdata->comm.buf_send_dev[iswap]:sdata->buffer;
|
||||
Cuda_CommCuda_PackComm_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n
|
||||
,sdata->comm.maxlistlength,iswap,dx,dy,dz,buf);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_kernel_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
|
||||
if(not sdata->overlap_comm)
|
||||
cudaMemcpy(buf_send, sdata->buffer, n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
|
||||
//cudaMemcpy(buf_send, sdata->comm.buf_send_dev[iswap], n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
sdata->cuda_timings.comm_forward_download+=
|
||||
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
|
||||
|
||||
int aflag;
|
||||
cudaMemcpy(&aflag, sdata->flag, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if(aflag!=0) printf("aflag PackComm: %i\n",aflag);
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
|
||||
|
||||
}
|
||||
return 3*n;
|
||||
}
|
||||
|
||||
int Cuda_CommCuda_PackCommVel(cuda_shared_data* sdata,int n,int iswap,void* buf_send,int* pbc,int pbc_flag)
|
||||
{
|
||||
|
||||
timespec time1,time2;
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*6*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
X_FLOAT dx=0.0;
|
||||
X_FLOAT dy=0.0;
|
||||
X_FLOAT dz=0.0;
|
||||
if (pbc_flag != 0) {
|
||||
if (sdata->domain.triclinic == 0) {
|
||||
dx = pbc[0]*sdata->domain.prd[0];
|
||||
dy = pbc[1]*sdata->domain.prd[1];
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
} else {
|
||||
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
|
||||
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
}}
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
cudaMemset( sdata->flag,0,sizeof(int));
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
void* buf=sdata->overlap_comm?sdata->comm.buf_send_dev[iswap]:sdata->buffer;
|
||||
Cuda_CommCuda_PackComm_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n
|
||||
,sdata->comm.maxlistlength,iswap,dx,dy,dz,buf);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_kernel_pack+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
|
||||
if(not sdata->overlap_comm)
|
||||
cudaMemcpy(buf_send, sdata->buffer, n*6*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
|
||||
//cudaMemcpy(buf_send, sdata->comm.buf_send_dev[iswap], n*3*sizeof(X_FLOAT), cudaMemcpyDeviceToHost);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
sdata->cuda_timings.comm_forward_download+=
|
||||
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
|
||||
|
||||
int aflag;
|
||||
cudaMemcpy(&aflag, sdata->flag, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if(aflag!=0) printf("aflag PackComm: %i\n",aflag);
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm: Kernel execution failed");
|
||||
|
||||
}
|
||||
return 6*n;
|
||||
}
|
||||
|
||||
int Cuda_CommCuda_PackComm_Self(cuda_shared_data* sdata,int n,int iswap,int first,int* pbc,int pbc_flag)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: CommCuda_PackComm_Self\n");)
|
||||
timespec time1,time2;
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
static int count=-1;
|
||||
count++;
|
||||
X_FLOAT dx=0.0;
|
||||
X_FLOAT dy=0.0;
|
||||
X_FLOAT dz=0.0;
|
||||
if (pbc_flag != 0) {
|
||||
if (sdata->domain.triclinic == 0) {
|
||||
dx = pbc[0]*sdata->domain.prd[0];
|
||||
dy = pbc[1]*sdata->domain.prd[1];
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
} else {
|
||||
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
|
||||
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
}}
|
||||
|
||||
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
Cuda_CommCuda_PackComm_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,dx,dy,dz,first);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_kernel_self+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm_Self: Kernel execution failed");
|
||||
}
|
||||
|
||||
return 3*n;
|
||||
}
|
||||
|
||||
int Cuda_CommCuda_PackCommVel_Self(cuda_shared_data* sdata,int n,int iswap,int first,int* pbc,int pbc_flag)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: CommCuda_PackComm_Self\n");)
|
||||
timespec time1,time2;
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*6*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
static int count=-1;
|
||||
count++;
|
||||
X_FLOAT dx=0.0;
|
||||
X_FLOAT dy=0.0;
|
||||
X_FLOAT dz=0.0;
|
||||
if (pbc_flag != 0) {
|
||||
if (sdata->domain.triclinic == 0) {
|
||||
dx = pbc[0]*sdata->domain.prd[0];
|
||||
dy = pbc[1]*sdata->domain.prd[1];
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
} else {
|
||||
dx = pbc[0]*sdata->domain.prd[0] + pbc[5]*sdata->domain.xy + pbc[4]*sdata->domain.xz;
|
||||
dy = pbc[1]*sdata->domain.prd[1] + pbc[3]*sdata->domain.yz;
|
||||
dz = pbc[2]*sdata->domain.prd[2];
|
||||
}}
|
||||
|
||||
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
Cuda_CommCuda_PackComm_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,dx,dy,dz,first);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_kernel_self+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackComm_Self: Kernel execution failed");
|
||||
}
|
||||
|
||||
return 6*n;
|
||||
}
|
||||
|
||||
void Cuda_CommCuda_UnpackComm(cuda_shared_data* sdata,int n,int first,void* buf_recv,int iswap)
|
||||
{
|
||||
timespec time1,time2;
|
||||
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
if(not sdata->overlap_comm||iswap<0)
|
||||
cudaMemcpy(sdata->buffer,(void*)buf_recv, n*3*sizeof(X_FLOAT), cudaMemcpyHostToDevice);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_upload+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
void* buf=(sdata->overlap_comm&&iswap>=0)?sdata->comm.buf_recv_dev[iswap]:sdata->buffer;
|
||||
Cuda_CommCuda_UnpackComm_Kernel<<<grid, threads,0>>>(n,first,buf);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
sdata->cuda_timings.comm_forward_kernel_unpack+=
|
||||
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackComm: Kernel execution failed");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void Cuda_CommCuda_UnpackCommVel(cuda_shared_data* sdata,int n,int first,void* buf_recv,int iswap)
|
||||
{
|
||||
timespec time1,time2;
|
||||
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*6*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
if(not sdata->overlap_comm||iswap<0)
|
||||
cudaMemcpy(sdata->buffer,(void*)buf_recv, n*6*sizeof(X_FLOAT), cudaMemcpyHostToDevice);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_forward_upload+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
void* buf=(sdata->overlap_comm&&iswap>=0)?sdata->comm.buf_recv_dev[iswap]:sdata->buffer;
|
||||
Cuda_CommCuda_UnpackComm_Kernel<<<grid, threads,0>>>(n,first,buf);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
sdata->cuda_timings.comm_forward_kernel_unpack+=
|
||||
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackComm: Kernel execution failed");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int Cuda_CommCuda_PackReverse(cuda_shared_data* sdata,int n,int first,void* buf_send)
|
||||
{
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(F_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
|
||||
F_FLOAT* buf=(F_FLOAT*)buf_send;
|
||||
F_FLOAT* f_dev=(F_FLOAT*)sdata->atom.f.dev_data;
|
||||
f_dev+=first;
|
||||
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
|
||||
buf+=n; f_dev+=sdata->atom.nmax;
|
||||
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
|
||||
buf+=n; f_dev+=sdata->atom.nmax;
|
||||
cudaMemcpy(buf, f_dev, n*sizeof(F_FLOAT), cudaMemcpyDeviceToHost);
|
||||
return n*3;
|
||||
}
|
||||
|
||||
|
||||
void Cuda_CommCuda_UnpackReverse(cuda_shared_data* sdata,int n,int iswap,void* buf_recv)
|
||||
{
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(F_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
cudaMemcpy(sdata->buffer,buf_recv, size, cudaMemcpyHostToDevice);
|
||||
Cuda_CommCuda_UnpackReverse_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap);
|
||||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_UnpackReverse: Kernel execution failed");
|
||||
}
|
||||
}
|
||||
|
||||
void Cuda_CommCuda_UnpackReverse_Self(cuda_shared_data* sdata,int n,int iswap,int first)
|
||||
{
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int size=n*3*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,n);
|
||||
|
||||
int3 layout=getgrid(n);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x, layout.y, 1);
|
||||
|
||||
if(sdata->atom.nlocal>0)
|
||||
{
|
||||
Cuda_CommCuda_UnpackReverse_Self_Kernel<<<grid, threads,0>>>((int*) sdata->comm.sendlist.dev_data,n,sdata->comm.maxlistlength,iswap,first);
|
||||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_PackReverse_Self: Kernel execution failed");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int Cuda_CommCuda_BuildSendlist(cuda_shared_data* sdata,int bordergroup,int ineed,int style,int atom_nfirst,int nfirst,int nlast,int dim,int iswap)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: CommCuda_BuildSendlist\n");)
|
||||
timespec time1,time2;
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
if(sdata->buffer_new or (80>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,10);
|
||||
int n;
|
||||
if (!bordergroup || ineed >= 2)
|
||||
n=nlast-nfirst+1;
|
||||
else
|
||||
{
|
||||
n=atom_nfirst;
|
||||
if(nlast-sdata->atom.nlocal+1>n) n=nlast-sdata->atom.nlocal+1;
|
||||
}
|
||||
int3 layout=getgrid(n,0,512,true);
|
||||
dim3 threads(layout.z, 1, 1);
|
||||
dim3 grid(layout.x+1, layout.y, 1);
|
||||
|
||||
|
||||
cudaMemset((int*) (sdata->buffer),0,sizeof(int));
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
if(style==1)
|
||||
Cuda_CommCuda_BuildSendlist_Single<<<grid, threads,(threads.x+1)*sizeof(int)>>>(bordergroup, ineed, atom_nfirst, nfirst, nlast, dim, iswap,(X_FLOAT*) sdata->comm.slablo.dev_data,(X_FLOAT*) sdata->comm.slabhi.dev_data,(int*) sdata->comm.sendlist.dev_data,sdata->comm.maxlistlength);
|
||||
else
|
||||
Cuda_CommCuda_BuildSendlist_Multi<<<grid, threads,(threads.x+1)*sizeof(int)>>>(bordergroup, ineed, atom_nfirst, nfirst, nlast, dim, iswap,(X_FLOAT*) sdata->comm.multilo.dev_data,(X_FLOAT*) sdata->comm.multihi.dev_data,(int*) sdata->comm.sendlist.dev_data,sdata->comm.maxlistlength);
|
||||
cudaThreadSynchronize();
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_border_kernel_buildlist+=
|
||||
time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
|
||||
|
||||
CUT_CHECK_ERROR("Cuda_CommCuda_BuildSendlist: Kernel execution failed");
|
||||
int nsend;
|
||||
cudaMemcpy(&nsend, sdata->buffer, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
return nsend;
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstring>
|
||||
#include "compute_pe_cuda.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
#include "angle.h"
|
||||
#include "dihedral.h"
|
||||
#include "improper.h"
|
||||
#include "kspace.h"
|
||||
#include "modify.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputePECuda::ComputePECuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
ComputePE(lmp, narg, arg)
|
||||
{
|
||||
cudable = 1;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef COMPUTE_CLASS
|
||||
|
||||
ComputeStyle(pe/cuda,ComputePECuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_COMPUTE_PE_CUDA_H
|
||||
#define LMP_COMPUTE_PE_CUDA_H
|
||||
|
||||
#include "compute_pe.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ComputePECuda : public ComputePE {
|
||||
public:
|
||||
ComputePECuda(class LAMMPS *, int, char **);
|
||||
~ComputePECuda() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,97 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "compute_pressure_cuda.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
#include "angle.h"
|
||||
#include "dihedral.h"
|
||||
#include "improper.h"
|
||||
#include "kspace.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{DUMMY0,INVOKED_SCALAR,INVOKED_VECTOR,DUMMMY3,INVOKED_PERATOM};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputePressureCuda::ComputePressureCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
ComputePressure(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
cudable = 1;
|
||||
|
||||
// store temperature ID used by pressure computation
|
||||
// insure it is valid for temperature computation
|
||||
|
||||
int n = strlen(arg[3]) + 1;
|
||||
char* id_temp = new char[n];
|
||||
strcpy(id_temp,arg[3]);
|
||||
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
delete id_temp;
|
||||
if (modify->compute[icompute]->cudable == 0)
|
||||
{
|
||||
error->warning("Compute pressure/cuda temperature ID is not cudable! Try a temp/cuda style.");
|
||||
cudable = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
double ComputePressureCuda::compute_scalar()
|
||||
{
|
||||
if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll();
|
||||
ComputePressure::compute_scalar();
|
||||
}
|
||||
|
||||
void ComputePressureCuda::compute_vector()
|
||||
{
|
||||
if(not temperature->cudable && cuda->finished_setup) cuda->downloadAll();
|
||||
ComputePressure::compute_vector();
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
#ifdef COMPUTE_CLASS
|
||||
|
||||
ComputeStyle(pressure/cuda,ComputePressureCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_COMPUTE_PRESSURE_CUDA_H
|
||||
#define LMP_COMPUTE_PRESSURE_CUDA_H
|
||||
|
||||
#include "compute_pressure.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ComputePressureCuda : public ComputePressure {
|
||||
public:
|
||||
ComputePressureCuda(class LAMMPS *, int, char **);
|
||||
~ComputePressureCuda() {}
|
||||
double compute_scalar();
|
||||
void compute_vector();
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,212 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "compute_temp_cuda.h"
|
||||
#include "compute_temp_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "force.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "group.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeTempCuda::ComputeTempCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 3) error->all("Illegal compute temp/cuda command");
|
||||
|
||||
scalar_flag = vector_flag = 1;
|
||||
size_vector = 6;
|
||||
extscalar = 0;
|
||||
extvector = 1;
|
||||
tempflag = 1;
|
||||
|
||||
vector = new double[6];
|
||||
cu_t_vector = 0;
|
||||
cu_t_scalar = 0;
|
||||
cudable=true;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeTempCuda::~ComputeTempCuda()
|
||||
{
|
||||
delete [] vector;
|
||||
delete cu_t_vector;
|
||||
delete cu_t_scalar;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempCuda::init()
|
||||
{
|
||||
fix_dof = 0;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
fix_dof += modify->fix[i]->dof(igroup);
|
||||
dof_compute();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempCuda::dof_compute()
|
||||
{
|
||||
double natoms = group->count(igroup);
|
||||
dof = domain->dimension * natoms;
|
||||
dof -= extra_dof + fix_dof;
|
||||
if (dof > 0.0) tfactor = force->mvv2e / (dof * force->boltz);
|
||||
else tfactor = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ComputeTempCuda::compute_scalar()
|
||||
{
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
|
||||
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
|
||||
invoked_scalar = update->ntimestep;
|
||||
Cuda_ComputeTempCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_scalar->dev_data());
|
||||
cu_t_scalar->download();
|
||||
}
|
||||
else
|
||||
{
|
||||
invoked_scalar = update->ntimestep;
|
||||
|
||||
double **v = atom->v;
|
||||
double *mass = atom->mass;
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double t = 0.0;
|
||||
|
||||
if (rmass) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) * rmass[i];
|
||||
} else {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
t += (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]) *
|
||||
mass[type[i]];
|
||||
}
|
||||
t_scalar=t;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
if (dynamic) dof_compute();
|
||||
scalar *= tfactor;
|
||||
if(scalar>1e15)
|
||||
{
|
||||
cuda->cu_v->download();
|
||||
cuda->cu_x->download();
|
||||
cuda->cu_type->download();
|
||||
double **v = atom->v;
|
||||
double **x = atom->x;
|
||||
printf("Out of v-range atoms: \n");
|
||||
for(int i=0;i<atom->nlocal;i++)
|
||||
if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5)
|
||||
printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]);
|
||||
error->all("Temperature out of range. Simulations will be abortet.\n");
|
||||
}
|
||||
return scalar;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempCuda::compute_vector()
|
||||
{
|
||||
int i;
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
|
||||
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
|
||||
|
||||
invoked_vector = update->ntimestep;
|
||||
|
||||
Cuda_ComputeTempCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_vector->dev_data());
|
||||
cu_t_vector->download();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
invoked_vector = update->ntimestep;
|
||||
|
||||
double **v = atom->v;
|
||||
double *mass = atom->mass;
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double massone,t[6];
|
||||
for (i = 0; i < 6; i++) t[i] = 0.0;
|
||||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
if (rmass) massone = rmass[i];
|
||||
else massone = mass[type[i]];
|
||||
t[0] += massone * v[i][0]*v[i][0];
|
||||
t[1] += massone * v[i][1]*v[i][1];
|
||||
t[2] += massone * v[i][2]*v[i][2];
|
||||
t[3] += massone * v[i][0]*v[i][1];
|
||||
t[4] += massone * v[i][0]*v[i][2];
|
||||
t[5] += massone * v[i][1]*v[i][2];
|
||||
}
|
||||
|
||||
for (i = 0; i < 6; i++) t_vector[i]=t[i];
|
||||
}
|
||||
MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world);
|
||||
for (i = 0; i < 6; i++) vector[i] *= force->mvv2e;
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef COMPUTE_CLASS
|
||||
|
||||
ComputeStyle(temp/cuda,ComputeTempCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_COMPUTE_TEMP_CUDA_H
|
||||
#define LMP_COMPUTE_TEMP_CUDA_H
|
||||
|
||||
#include "compute.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ComputeTempCuda : public Compute {
|
||||
public:
|
||||
ComputeTempCuda(class LAMMPS *, int, char **);
|
||||
~ComputeTempCuda();
|
||||
void init();
|
||||
double compute_scalar();
|
||||
void compute_vector();
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int fix_dof;
|
||||
double tfactor;
|
||||
|
||||
void dof_compute();
|
||||
double t_vector[6];
|
||||
double t_scalar;
|
||||
cCudaData<double , ENERGY_FLOAT , x>* cu_t_scalar;
|
||||
cCudaData<double , ENERGY_FLOAT , x>* cu_t_vector;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,357 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "compute_temp_partial_cuda.h"
|
||||
#include "compute_temp_partial_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "force.h"
|
||||
#include "domain.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeTempPartialCuda::ComputeTempPartialCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Compute(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 6) error->all("Illegal compute temp/partial command");
|
||||
|
||||
scalar_flag = vector_flag = 1;
|
||||
size_vector = 6;
|
||||
extscalar = 0;
|
||||
extvector = 1;
|
||||
tempflag = 1;
|
||||
tempbias = 1;
|
||||
|
||||
xflag = atoi(arg[3]);
|
||||
yflag = atoi(arg[4]);
|
||||
zflag = atoi(arg[5]);
|
||||
if (zflag && domain->dimension == 2)
|
||||
error->all("Compute temp/partial cannot use vz for 2d systemx");
|
||||
|
||||
maxbias = 0;
|
||||
vbiasall = NULL;
|
||||
|
||||
vector = new double[6];
|
||||
cu_t_vector = 0;
|
||||
cu_t_scalar = 0;
|
||||
cu_vbiasall=NULL;
|
||||
cudable=true;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ComputeTempPartialCuda::~ComputeTempPartialCuda()
|
||||
{
|
||||
memory->destroy(vbiasall);
|
||||
delete [] vector;
|
||||
delete cu_t_vector;
|
||||
delete cu_t_scalar;
|
||||
delete cu_vbiasall;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::init()
|
||||
{
|
||||
fix_dof = 0;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
fix_dof += modify->fix[i]->dof(igroup);
|
||||
dof_compute();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::dof_compute()
|
||||
{
|
||||
double natoms = group->count(igroup);
|
||||
int nper = xflag+yflag+zflag;
|
||||
dof = nper * natoms;
|
||||
dof -= (1.0*nper/domain->dimension)*fix_dof + extra_dof;
|
||||
if (dof > 0) tfactor = force->mvv2e / (dof * force->boltz);
|
||||
else tfactor = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int ComputeTempPartialCuda::dof_remove(int i)
|
||||
{
|
||||
int nper = xflag+yflag+zflag;
|
||||
return (domain->dimension - nper);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ComputeTempPartialCuda::compute_scalar()
|
||||
{
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
|
||||
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
|
||||
invoked_scalar = update->ntimestep;
|
||||
Cuda_ComputeTempPartialCuda_Scalar(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_scalar->dev_data(),xflag,yflag,zflag);
|
||||
cu_t_scalar->download();
|
||||
}
|
||||
else
|
||||
{
|
||||
invoked_scalar = update->ntimestep;
|
||||
|
||||
double **v = atom->v;
|
||||
double *mass = atom->mass;
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double t = 0.0;
|
||||
|
||||
if (rmass) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) * rmass[i];
|
||||
} else {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
t += (xflag*v[i][0]*v[i][0] + yflag*v[i][1]*v[i][1] + zflag*v[i][2]*v[i][2]) *
|
||||
mass[type[i]];
|
||||
}
|
||||
t_scalar=t;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&t_scalar,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
if (dynamic) dof_compute();
|
||||
scalar *= tfactor;
|
||||
if(scalar>1e15)
|
||||
{
|
||||
cuda->cu_v->download();
|
||||
cuda->cu_x->download();
|
||||
cuda->cu_type->download();
|
||||
double **v = atom->v;
|
||||
double **x = atom->x;
|
||||
printf("Out of v-range atoms: \n");
|
||||
for(int i=0;i<atom->nlocal;i++)
|
||||
if((v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2])>1e5)
|
||||
printf("%i %i // %lf %lf %lf // %lf %lf %lf\n",atom->tag[i],atom->type[i],x[i][0], x[i][1], x[i][2],v[i][0], v[i][1], v[i][2]);
|
||||
error->all("Temperature out of range. Simulations will be abortet.\n");
|
||||
}
|
||||
return scalar;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::compute_vector()
|
||||
{
|
||||
int i;
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
if(not cu_t_vector) cu_t_vector = new cCudaData<double, ENERGY_FLOAT, x> (t_vector,6);
|
||||
if(not cu_t_scalar) cu_t_scalar = new cCudaData<double, ENERGY_FLOAT, x> (&t_scalar,1);
|
||||
|
||||
invoked_vector = update->ntimestep;
|
||||
|
||||
Cuda_ComputeTempPartialCuda_Vector(&cuda->shared_data,groupbit,(ENERGY_FLOAT*) cu_t_vector->dev_data(),xflag,yflag,zflag);
|
||||
cu_t_vector->download();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
invoked_vector = update->ntimestep;
|
||||
|
||||
double **v = atom->v;
|
||||
double *mass = atom->mass;
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double massone,t[6];
|
||||
for (i = 0; i < 6; i++) t[i] = 0.0;
|
||||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
if (rmass) massone = rmass[i];
|
||||
else massone = mass[type[i]];
|
||||
t[0] += massone * xflag*v[i][0]*v[i][0];
|
||||
t[1] += massone * yflag*v[i][1]*v[i][1];
|
||||
t[2] += massone * zflag*v[i][2]*v[i][2];
|
||||
t[3] += massone * xflag*yflag*v[i][0]*v[i][1];
|
||||
t[4] += massone * xflag*zflag*v[i][0]*v[i][2];
|
||||
t[5] += massone * yflag*zflag*v[i][1]*v[i][2];
|
||||
}
|
||||
|
||||
for (i = 0; i < 6; i++) t_vector[i]=t[i];
|
||||
}
|
||||
MPI_Allreduce(t_vector,vector,6,MPI_DOUBLE,MPI_SUM,world);
|
||||
for (i = 0; i < 6; i++) vector[i] *= force->mvv2e;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remove velocity bias from atom I to leave thermal velocity
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::remove_bias(int i, double *v)
|
||||
{
|
||||
if (!xflag) {
|
||||
vbias[0] = v[0];
|
||||
v[0] = 0.0;
|
||||
}
|
||||
if (!yflag) {
|
||||
vbias[1] = v[1];
|
||||
v[1] = 0.0;
|
||||
}
|
||||
if (!zflag) {
|
||||
vbias[2] = v[2];
|
||||
v[2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remove velocity bias from all atoms to leave thermal velocity
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::remove_bias_all()
|
||||
{
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (nlocal > maxbias) {
|
||||
memory->destroy(vbiasall);
|
||||
maxbias = atom->nmax;
|
||||
memory->create(vbiasall,maxbias,3,"temp/partial:vbiasall");
|
||||
delete cu_vbiasall;
|
||||
cu_vbiasall = new cCudaData<double, V_FLOAT, yx> ((double*)vbiasall, atom->nmax, 3);
|
||||
}
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
Cuda_ComputeTempPartialCuda_RemoveBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!xflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
vbiasall[i][0] = v[i][0];
|
||||
v[i][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (!yflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
vbiasall[i][1] = v[i][1];
|
||||
v[i][1] = 0.0;
|
||||
}
|
||||
}
|
||||
if (!zflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
vbiasall[i][2] = v[i][2];
|
||||
v[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
add back in velocity bias to atom I removed by remove_bias()
|
||||
assume remove_bias() was previously called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::restore_bias(int i, double *v)
|
||||
{
|
||||
if (!xflag) v[0] += vbias[0];
|
||||
if (!yflag) v[1] += vbias[1];
|
||||
if (!zflag) v[2] += vbias[2];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
add back in velocity bias to all atoms removed by remove_bias_all()
|
||||
assume remove_bias_all() was previously called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ComputeTempPartialCuda::restore_bias_all()
|
||||
{
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
if(cuda->begin_setup)
|
||||
{
|
||||
Cuda_ComputeTempPartialCuda_RestoreBiasAll(&cuda->shared_data,groupbit,xflag,yflag,zflag,cu_vbiasall->dev_data());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (!xflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
v[i][0] += vbiasall[i][0];
|
||||
}
|
||||
if (!yflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
v[i][1] += vbiasall[i][1];
|
||||
}
|
||||
if (!zflag) {
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
v[i][2] += vbiasall[i][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ComputeTempPartialCuda::memory_usage()
|
||||
{
|
||||
double bytes = maxbias * sizeof(double);
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef COMPUTE_CLASS
|
||||
|
||||
ComputeStyle(temp/partial/cuda,ComputeTempPartialCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_COMPUTE_TEMP_PARTIAL_CUDA_H
|
||||
#define LMP_COMPUTE_TEMP_PARTIAL_CUDA_H
|
||||
|
||||
#include "compute.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ComputeTempPartialCuda : public Compute {
|
||||
public:
|
||||
ComputeTempPartialCuda(class LAMMPS *, int, char **);
|
||||
~ComputeTempPartialCuda();
|
||||
void init();
|
||||
double compute_scalar();
|
||||
void compute_vector();
|
||||
|
||||
int dof_remove(int);
|
||||
void remove_bias(int, double *);
|
||||
void remove_bias_all();
|
||||
void restore_bias(int, double *);
|
||||
void restore_bias_all();
|
||||
double memory_usage();
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int xflag,yflag,zflag;
|
||||
int fix_dof;
|
||||
double tfactor;
|
||||
|
||||
void dof_compute();
|
||||
double t_vector[6];
|
||||
double t_scalar;
|
||||
cCudaData<double , ENERGY_FLOAT , x>* cu_t_scalar;
|
||||
cCudaData<double , ENERGY_FLOAT , x>* cu_t_vector;
|
||||
cCudaData<double, V_FLOAT, yx>* cu_vbiasall;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -230,7 +230,7 @@ void Cuda::accelerator(int narg, char** arg)
|
|||
{
|
||||
if(++i==narg)
|
||||
error->all("Invalid Options for 'accelerator' command. Expecting a string after 'suffix' option.");
|
||||
strcpy(lmp->asuffix,arg[i]);
|
||||
strcpy(lmp->suffix,arg[i]);
|
||||
}
|
||||
if(strcmp(arg[i],"overlap_comm")==0)
|
||||
{
|
||||
|
|
|
@ -29,12 +29,16 @@
|
|||
#include <algorithm>
|
||||
#include "cuda.h"
|
||||
#include "atom.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
CudaNeighList::CudaNeighList(LAMMPS *lmp, class NeighList* neigh_list) : Pointers(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
MYDBG(printf("# CUDA: CudaNeighList::cudaNeighList() ... start\n");)
|
||||
this->neigh_list = neigh_list;
|
||||
neigh_list->cuda_list=this;
|
||||
|
|
|
@ -54,6 +54,8 @@ enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
|
|||
DomainCuda::DomainCuda(LAMMPS *lmp) : Domain(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -0,0 +1,608 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include "fft3d_cuda.h"
|
||||
#include "fft3d_cuda_cu.h"
|
||||
#include "remap.h"
|
||||
#include <ctime>
|
||||
#include "cuda_wrapper_cu.h"
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
#endif
|
||||
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
|
||||
#define MAX(A,B) ((A) > (B)) ? (A) : (B)
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Data layout for 3d FFTs:
|
||||
|
||||
data set of Nfast x Nmid x Nslow elements is owned by P procs
|
||||
on input, each proc owns a subsection of the elements
|
||||
on output, each proc will own a (possibly different) subsection
|
||||
my subsection must not overlap with any other proc's subsection,
|
||||
i.e. the union of all proc's input (or output) subsections must
|
||||
exactly tile the global Nfast x Nmid x Nslow data set
|
||||
when called from C, all subsection indices are
|
||||
C-style from 0 to N-1 where N = Nfast or Nmid or Nslow
|
||||
when called from F77, all subsection indices are
|
||||
F77-style from 1 to N where N = Nfast or Nmid or Nslow
|
||||
a proc can own 0 elements on input or output
|
||||
by specifying hi index < lo index
|
||||
on both input and output, data is stored contiguously on a processor
|
||||
with a fast-varying, mid-varying, and slow-varying index
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Perform 3d FFT
|
||||
|
||||
Arguments:
|
||||
in starting address of input data on this proc
|
||||
out starting address of where output data for this proc
|
||||
will be placed (can be same as in)
|
||||
flag 1 for forward FFT, -1 for inverse FFT
|
||||
plan plan returned by previous call to fft_3d_create_plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
plan->iterate++;
|
||||
timespec starttime,starttime2;
|
||||
timespec endtime,endtime2;
|
||||
|
||||
int i,total,length,offset,num;
|
||||
double norm;
|
||||
FFT_DATA *data,*copy;
|
||||
// system specific constants
|
||||
|
||||
|
||||
// pre-remap to prepare for 1st FFTs if needed
|
||||
// copy = loc for remap result
|
||||
int nprocs=plan->nprocs;
|
||||
if(nprocs>1)
|
||||
{
|
||||
if(plan->init)
|
||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||
if (plan->pre_plan) {
|
||||
if (plan->pre_target == 0) copy = out;
|
||||
else copy = plan->copy;
|
||||
if(plan->init) remap_3d((double *) in, (double *) out, (double *) plan->scratch,plan->pre_plan);
|
||||
data = out;
|
||||
}
|
||||
else
|
||||
data = in;
|
||||
}
|
||||
cufftResult retvalc;
|
||||
if(plan->init)
|
||||
{
|
||||
if(nprocs>1)
|
||||
{
|
||||
if(sizeof(FFT_FLOAT)==sizeof(double))cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize/2,cudaMemcpyHostToDevice);
|
||||
if(sizeof(FFT_FLOAT)==sizeof(float)) cudaMemcpy((void*) (plan->cudata2), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice);
|
||||
initfftdata((double*)plan->cudata2,(FFT_FLOAT*)plan->cudata,plan->nfast,plan->nmid,plan->nslow);
|
||||
}
|
||||
}
|
||||
if (flag == -1)
|
||||
{
|
||||
retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_FORWARD);
|
||||
}
|
||||
else
|
||||
{
|
||||
retvalc=cufft(plan->plan_3d, plan->cudata, plan->cudata2,CUFFT_INVERSE);
|
||||
}
|
||||
if(retvalc!=CUFFT_SUCCESS) {printf("ErrorCUFFT: %i\n",retvalc);exit(EXIT_FAILURE);}
|
||||
|
||||
FFTsyncthreads();
|
||||
#endif
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
Create plan for performing a 3d FFT
|
||||
|
||||
Arguments:
|
||||
comm MPI communicator for the P procs which own the data
|
||||
nfast,nmid,nslow size of global 3d matrix
|
||||
in_ilo,in_ihi input bounds of data I own in fast index
|
||||
in_jlo,in_jhi input bounds of data I own in mid index
|
||||
in_klo,in_khi input bounds of data I own in slow index
|
||||
out_ilo,out_ihi output bounds of data I own in fast index
|
||||
out_jlo,out_jhi output bounds of data I own in mid index
|
||||
out_klo,out_khi output bounds of data I own in slow index
|
||||
scaled 0 = no scaling of result, 1 = scaling
|
||||
permute permutation in storage order of indices on output
|
||||
0 = no permutation
|
||||
1 = permute once = mid->fast, slow->mid, fast->slow
|
||||
2 = permute twice = slow->fast, fast->mid, mid->slow
|
||||
nbuf returns size of internal storage buffers used by FFT
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
struct fft_plan_3d *fft_3d_create_plan_cuda(
|
||||
MPI_Comm comm, int nfast, int nmid, int nslow,
|
||||
int in_ilo, int in_ihi, int in_jlo, int in_jhi,
|
||||
int in_klo, int in_khi,
|
||||
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
|
||||
int out_klo, int out_khi,
|
||||
int scaled, int permute, int *nbuf,bool ainit)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
struct fft_plan_3d *plan;
|
||||
int me,nprocs;
|
||||
int i,num,flag,remapflag,fftflag;
|
||||
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
|
||||
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
|
||||
int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
|
||||
int out_size,first_size,second_size,third_size,copy_size,scratch_size;
|
||||
int np1,np2,ip1,ip2;
|
||||
int list[50];
|
||||
|
||||
// system specific variables
|
||||
|
||||
// query MPI info
|
||||
|
||||
MPI_Comm_rank(comm,&me);
|
||||
MPI_Comm_size(comm,&nprocs);
|
||||
|
||||
#ifndef FFT_CUFFT
|
||||
error->all("ERROR: Trying to use cuda fft without FFT_CUFFT set. Recompile with make option 'cufft=1'.");
|
||||
#endif
|
||||
// compute division of procs in 2 dimensions not on-processor
|
||||
bifactor_cuda(nprocs,&np1,&np2);
|
||||
ip1 = me % np1;
|
||||
ip2 = me/np1;
|
||||
|
||||
// in case of CUDA FFT every proc does the full FFT in order to avoid data transfers (the problem is other wise heavily bandwidth limited)
|
||||
|
||||
int ip1out = ip1;
|
||||
int ip2out = ip2;
|
||||
int np1out = np1;
|
||||
int np2out = np2;
|
||||
|
||||
ip1 = 0;
|
||||
ip2 = 0;
|
||||
np1 = 1;
|
||||
np2 = 1;
|
||||
|
||||
// allocate memory for plan data struct
|
||||
|
||||
plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
|
||||
if (plan == NULL) return NULL;
|
||||
plan->init=ainit;
|
||||
|
||||
// remap from initial distribution to layout needed for 1st set of 1d FFTs
|
||||
// not needed if all procs own entire fast axis initially
|
||||
// first indices = distribution after 1st set of FFTs
|
||||
|
||||
if (in_ilo == 0 && in_ihi == nfast-1)
|
||||
flag = 0;
|
||||
else
|
||||
flag = 1;
|
||||
|
||||
if(nprocs>1)flag=1;
|
||||
|
||||
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
|
||||
|
||||
if (remapflag == 0) {
|
||||
first_ilo = in_ilo;
|
||||
first_ihi = in_ihi;
|
||||
first_jlo = in_jlo;
|
||||
first_jhi = in_jhi;
|
||||
first_klo = in_klo;
|
||||
first_khi = in_khi;
|
||||
plan->pre_plan = NULL;
|
||||
}
|
||||
else {
|
||||
first_ilo = 0;
|
||||
first_ihi = nfast - 1;
|
||||
first_jlo = ip1*nmid/np1;
|
||||
first_jhi = (ip1+1)*nmid/np1 - 1;
|
||||
first_klo = ip2*nslow/np2;
|
||||
first_khi = (ip2+1)*nslow/np2 - 1;
|
||||
int members=2;
|
||||
if(plan->init) members=1;
|
||||
plan->pre_plan =
|
||||
remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
|
||||
first_ilo,first_ihi,first_jlo,first_jhi,
|
||||
first_klo,first_khi,
|
||||
members,0,0,2);
|
||||
if (plan->pre_plan == NULL) return NULL;
|
||||
}
|
||||
|
||||
// 1d FFTs along fast axis
|
||||
|
||||
plan->length1 = nfast;
|
||||
plan->total1 = nfast * nmid * nslow;
|
||||
|
||||
// remap from 1st to 2nd FFT
|
||||
// choose which axis is split over np1 vs np2 to minimize communication
|
||||
// second indices = distribution after 2nd set of FFTs
|
||||
|
||||
second_ilo = ip1*nfast/np1;
|
||||
second_ihi = (ip1+1)*nfast/np1 - 1;
|
||||
second_jlo = 0;
|
||||
second_jhi = nmid - 1;
|
||||
second_klo = ip2*nslow/np2;
|
||||
second_khi = (ip2+1)*nslow/np2 - 1;
|
||||
plan->mid1_plan =
|
||||
remap_3d_create_plan(comm,
|
||||
first_ilo,first_ihi,first_jlo,first_jhi,
|
||||
first_klo,first_khi,
|
||||
second_ilo,second_ihi,second_jlo,second_jhi,
|
||||
second_klo,second_khi,
|
||||
2,1,0,2);
|
||||
if (plan->mid1_plan == NULL) return NULL;
|
||||
|
||||
// 1d FFTs along mid axis
|
||||
|
||||
plan->length2 = nmid;
|
||||
plan->total2 = nfast * nmid * nslow;
|
||||
|
||||
// remap from 2nd to 3rd FFT
|
||||
// if final distribution is permute=2 with all procs owning entire slow axis
|
||||
// then this remapping goes directly to final distribution
|
||||
// third indices = distribution after 3rd set of FFTs
|
||||
|
||||
flag=1;
|
||||
|
||||
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
|
||||
|
||||
if (remapflag == 0) {
|
||||
third_ilo = out_ilo;
|
||||
third_ihi = out_ihi;
|
||||
third_jlo = out_jlo;
|
||||
third_jhi = out_jhi;
|
||||
third_klo = out_klo;
|
||||
third_khi = out_khi;
|
||||
}
|
||||
else {
|
||||
third_ilo = ip1*nfast/np1;
|
||||
third_ihi = (ip1+1)*nfast/np1 - 1;
|
||||
third_jlo = ip2*nmid/np2;
|
||||
third_jhi = (ip2+1)*nmid/np2 - 1;
|
||||
third_klo = 0;
|
||||
third_khi = nslow - 1;
|
||||
}
|
||||
|
||||
plan->mid2_plan =
|
||||
remap_3d_create_plan(comm,
|
||||
second_jlo,second_jhi,second_klo,second_khi,
|
||||
second_ilo,second_ihi,
|
||||
third_jlo,third_jhi,third_klo,third_khi,
|
||||
third_ilo,third_ihi,
|
||||
2,1,0,2);
|
||||
if (plan->mid2_plan == NULL) return NULL;
|
||||
|
||||
// 1d FFTs along slow axis
|
||||
|
||||
plan->length3 = nslow;
|
||||
plan->total3 = nfast * nmid * nslow;
|
||||
|
||||
// remap from 3rd FFT to final distribution
|
||||
// not needed if permute = 2 and third indices = out indices on all procs
|
||||
|
||||
flag=1;
|
||||
|
||||
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
|
||||
|
||||
if (remapflag == 0)
|
||||
plan->post_plan = NULL;
|
||||
else {
|
||||
plan->post_plan =
|
||||
remap_3d_create_plan(comm,
|
||||
third_klo,third_khi,third_ilo,third_ihi,
|
||||
third_jlo,third_jhi,
|
||||
out_klo,out_khi,out_ilo,out_ihi,
|
||||
out_jlo,out_jhi,
|
||||
2,(permute+1)%3,0,2);
|
||||
if (plan->post_plan == NULL) return NULL;
|
||||
}
|
||||
|
||||
// configure plan memory pointers and allocate work space
|
||||
// out_size = amount of memory given to FFT by user
|
||||
// first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps
|
||||
// copy_size = amount needed internally for extra copy of data
|
||||
// scratch_size = amount needed internally for remap scratch space
|
||||
// for each remap:
|
||||
// out space used for result if big enough, else require copy buffer
|
||||
// accumulate largest required remap scratch space
|
||||
|
||||
out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1);
|
||||
first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) *
|
||||
(first_khi-first_klo+1);
|
||||
second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) *
|
||||
(second_khi-second_klo+1);
|
||||
third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) *
|
||||
(third_khi-third_klo+1);
|
||||
|
||||
plan->ihi_out=out_ihi;
|
||||
plan->ilo_out=out_ilo;
|
||||
plan->jhi_out=out_jhi;
|
||||
plan->jlo_out=out_jlo;
|
||||
plan->khi_out=out_khi;
|
||||
plan->klo_out=out_klo;
|
||||
|
||||
copy_size = 0;
|
||||
scratch_size = 0;
|
||||
|
||||
if (plan->pre_plan) {
|
||||
if (first_size <= out_size)
|
||||
plan->pre_target = 0;
|
||||
else {
|
||||
plan->pre_target = 1;
|
||||
copy_size = MAX(copy_size,first_size);
|
||||
}
|
||||
scratch_size = MAX(scratch_size,first_size);
|
||||
}
|
||||
|
||||
if (plan->mid1_plan) {
|
||||
if (second_size <= out_size)
|
||||
plan->mid1_target = 0;
|
||||
else {
|
||||
plan->mid1_target = 1;
|
||||
copy_size = MAX(copy_size,second_size);
|
||||
}
|
||||
scratch_size = MAX(scratch_size,second_size);
|
||||
}
|
||||
|
||||
if (plan->mid2_plan) {
|
||||
if (third_size <= out_size)
|
||||
plan->mid2_target = 0;
|
||||
else {
|
||||
plan->mid2_target = 1;
|
||||
copy_size = MAX(copy_size,third_size);
|
||||
}
|
||||
scratch_size = MAX(scratch_size,third_size);
|
||||
}
|
||||
|
||||
if (plan->post_plan)
|
||||
scratch_size = MAX(scratch_size,out_size);
|
||||
|
||||
*nbuf = copy_size + scratch_size;
|
||||
|
||||
if (copy_size) {
|
||||
plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA));
|
||||
if (plan->copy == NULL) return NULL;
|
||||
}
|
||||
else plan->copy = NULL;
|
||||
|
||||
if (scratch_size) {
|
||||
plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA));
|
||||
if (plan->scratch == NULL) return NULL;
|
||||
}
|
||||
else plan->scratch = NULL;
|
||||
|
||||
// system specific pre-computation of 1d FFT coeffs
|
||||
// and scaling normalization
|
||||
|
||||
cufftResult retvalc;
|
||||
int nfft = (in_ihi-in_ilo+1) * (in_jhi-in_jlo+1) *
|
||||
(in_khi-in_klo+1);
|
||||
int nfft_brick = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
||||
(out_khi-out_klo+1);
|
||||
|
||||
int nfft_both = MAX(nfft,nfft_brick);
|
||||
nfft_both=nfast*nmid*nslow;
|
||||
|
||||
plan->cudatasize=nfft_both*sizeof(FFT_DATA);
|
||||
|
||||
//retvalc=cufftPlan1d(&(plan->plan_fast), nfast, CUFFT_PLAN,plan->total1/nfast);
|
||||
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT1: %i\n",retvalc);
|
||||
plan->nfast=nfast;
|
||||
|
||||
//retvalc=cufftPlan1d(&(plan->plan_mid), nmid, CUFFT_PLAN,plan->total2/nmid);
|
||||
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT2: %i\n",retvalc);
|
||||
plan->nmid=nmid;
|
||||
|
||||
//retvalc=cufftPlan1d(&(plan->plan_slow), nslow, CUFFT_PLAN,plan->total3/nslow);
|
||||
//if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc);
|
||||
plan->nslow=nslow;
|
||||
|
||||
retvalc=cufftPlan3d(&(plan->plan_3d), nslow,nmid,nfast, CUFFT_PLAN);
|
||||
if(retvalc!=CUFFT_SUCCESS) printf("ErrorCUFFT3: %i\n",retvalc);
|
||||
|
||||
plan->nprocs=nprocs;
|
||||
plan->me=me;
|
||||
if (scaled == 0)
|
||||
plan->scaled = 0;
|
||||
else {
|
||||
plan->scaled = 1;
|
||||
plan->norm = 1.0/(nfast*nmid*nslow);
|
||||
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
||||
(out_khi-out_klo+1);
|
||||
}
|
||||
|
||||
plan->coretime=0;
|
||||
plan->iterate=0;
|
||||
plan->ffttime=0;
|
||||
return plan;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Destroy a 3d fft plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void fft_3d_destroy_plan_cuda(struct fft_plan_3d *plan)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
if (plan->pre_plan) remap_3d_destroy_plan(plan->pre_plan);
|
||||
if (plan->mid1_plan) remap_3d_destroy_plan(plan->mid1_plan);
|
||||
if (plan->mid2_plan) remap_3d_destroy_plan(plan->mid2_plan);
|
||||
if (plan->post_plan) remap_3d_destroy_plan(plan->post_plan);
|
||||
|
||||
if (plan->copy) free(plan->copy);
|
||||
if (plan->scratch) free(plan->scratch);
|
||||
|
||||
|
||||
//cufftDestroy(plan->plan_fast);
|
||||
//cufftDestroy(plan->plan_mid);
|
||||
//cufftDestroy(plan->plan_slow);
|
||||
cufftDestroy(plan->plan_3d);
|
||||
free(plan);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
recursively divide n into small factors, return them in list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void factor_cuda(int n, int *num, int *list)
|
||||
{
|
||||
if (n == 1) {
|
||||
return;
|
||||
}
|
||||
else if (n % 2 == 0) {
|
||||
*list = 2;
|
||||
(*num)++;
|
||||
factor_cuda(n/2,num,list+1);
|
||||
}
|
||||
else if (n % 3 == 0) {
|
||||
*list = 3;
|
||||
(*num)++;
|
||||
factor_cuda(n/3,num,list+1);
|
||||
}
|
||||
else if (n % 5 == 0) {
|
||||
*list = 5;
|
||||
(*num)++;
|
||||
factor_cuda(n/5,num,list+1);
|
||||
}
|
||||
else if (n % 7 == 0) {
|
||||
*list = 7;
|
||||
(*num)++;
|
||||
factor_cuda(n/7,num,list+1);
|
||||
}
|
||||
else if (n % 11 == 0) {
|
||||
*list = 11;
|
||||
(*num)++;
|
||||
factor_cuda(n/11,num,list+1);
|
||||
}
|
||||
else if (n % 13 == 0) {
|
||||
*list = 13;
|
||||
(*num)++;
|
||||
factor_cuda(n/13,num,list+1);
|
||||
}
|
||||
else {
|
||||
*list = n;
|
||||
(*num)++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
divide n into 2 factors of as equal size as possible
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void bifactor_cuda(int n, int *factor1, int *factor2)
|
||||
{
|
||||
int n1,n2,facmax;
|
||||
|
||||
facmax = static_cast<int> (sqrt((double) n));
|
||||
|
||||
for (n1 = facmax; n1 > 0; n1--) {
|
||||
n2 = n/n1;
|
||||
if (n1*n2 == n) {
|
||||
*factor1 = n1;
|
||||
*factor2 = n2;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform just the 1d FFTs needed by a 3d FFT, no data movement
|
||||
used for timing purposes
|
||||
|
||||
Arguments:
|
||||
in starting address of input data on this proc, all set to 0.0
|
||||
nsize size of in
|
||||
flag 1 for forward FFT, -1 for inverse FFT
|
||||
plan plan returned by previous call to fft_3d_create_plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void fft_1d_only_cuda(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
int i,total,length,offset,num;
|
||||
double norm;
|
||||
|
||||
// system specific constants
|
||||
|
||||
|
||||
|
||||
// total = size of data needed in each dim
|
||||
// length = length of 1d FFT in each dim
|
||||
// total/length = # of 1d FFTs in each dim
|
||||
// if total > nsize, limit # of 1d FFTs to available size of data
|
||||
|
||||
int total1 = plan->total1;
|
||||
int length1 = plan->length1;
|
||||
int total2 = plan->total2;
|
||||
int length2 = plan->length2;
|
||||
int total3 = plan->total3;
|
||||
int length3 = plan->length3;
|
||||
|
||||
if (total1 > nsize) total1 = (nsize/length1) * length1;
|
||||
if (total2 > nsize) total2 = (nsize/length2) * length2;
|
||||
if (total3 > nsize) total3 = (nsize/length3) * length3;
|
||||
|
||||
// perform 1d FFTs in each of 3 dimensions
|
||||
// data is just an array of 0.0
|
||||
|
||||
|
||||
cudaMemcpy((void**) &(plan->cudata), (void*) data, plan->cudatasize,cudaMemcpyHostToDevice);
|
||||
if (flag == -1) {
|
||||
cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD);
|
||||
/*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_FORWARD);
|
||||
cufft(plan->plan_mid, plan->cudata, plan->cudata,CUFFT_FORWARD);
|
||||
cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_FORWARD);*/
|
||||
} else {
|
||||
cufft(plan->plan_3d, plan->cudata, plan->cudata,CUFFT_FORWARD);
|
||||
/*cufft(plan->plan_fast, plan->cudata, plan->cudata,CUFFT_INVERSE);
|
||||
cufft(plan->plan_mid,plan->cudata, plan->cudata,CUFFT_INVERSE);
|
||||
cufft(plan->plan_slow, plan->cudata, plan->cudata,CUFFT_INVERSE);*/
|
||||
}
|
||||
cudaMemcpy((void*) data, (void**) &(plan->cudata), plan->cudatasize,cudaMemcpyDeviceToHost);
|
||||
|
||||
// scaling if required
|
||||
// limit num to size of data
|
||||
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// User-settable FFT precision
|
||||
|
||||
// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
|
||||
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)
|
||||
#include "cuda_precision.h"
|
||||
//#define FFT_PRECISION 2
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Data types for single-precision complex
|
||||
|
||||
#if FFT_PRECISION_CU == 1
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
#include "cuda_runtime.h"
|
||||
#include "cufft.h"
|
||||
typedef struct {
|
||||
float re;
|
||||
float im;
|
||||
} FFT_DATA;
|
||||
typedef cufftComplex cufftData;
|
||||
typedef cufftReal cufftDataInit;
|
||||
#define cufft cufftExecC2C
|
||||
#define cufftinit cufftExecR2C
|
||||
#define CUFFT_PLAN CUFFT_C2C
|
||||
#define CUFFT_PLAN_INIT CUFFT_R2C
|
||||
#else
|
||||
typedef struct {
|
||||
float re;
|
||||
float im;
|
||||
} FFT_DATA;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Data types for double-precision complex
|
||||
|
||||
#if FFT_PRECISION_CU == 2
|
||||
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
#include "cuda_runtime.h"
|
||||
#include "cufft.h"
|
||||
typedef cufftDoubleComplex cufftData;
|
||||
typedef cufftDoubleReal cufftDataInit;
|
||||
typedef struct {
|
||||
double re;
|
||||
double im;
|
||||
} FFT_DATA;
|
||||
#define cufft cufftExecZ2Z
|
||||
#define cufftinit cufftExecD2Z
|
||||
#define CUFFT_PLAN CUFFT_Z2Z
|
||||
#define CUFFT_PLAN_INIT CUFFT_D2Z
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// details of how to do a 3d FFT
|
||||
|
||||
struct fft_plan_3d {
|
||||
struct remap_plan_3d *pre_plan; // remap from input -> 1st FFTs
|
||||
struct remap_plan_3d *mid1_plan; // remap from 1st -> 2nd FFTs
|
||||
struct remap_plan_3d *mid2_plan; // remap from 2nd -> 3rd FFTs
|
||||
struct remap_plan_3d *post_plan; // remap from 3rd FFTs -> output
|
||||
FFT_DATA *copy; // memory for remap results (if needed)
|
||||
FFT_DATA *scratch; // scratch space for remaps
|
||||
int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length)
|
||||
int length1,length2,length3; // length of 1st,2nd,3rd FFTs
|
||||
int pre_target; // where to put remap results
|
||||
int mid1_target,mid2_target;
|
||||
int scaled; // whether to scale FFT results
|
||||
int normnum; // # of values to rescale
|
||||
double norm; // normalization factor for rescaling
|
||||
|
||||
double coretime;
|
||||
double ffttime;
|
||||
int iterate;
|
||||
// system specific 1d FFT info
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
//CUdeviceptr cudata;
|
||||
cufftData* cudata;
|
||||
cufftData* cudata2;
|
||||
unsigned int cudatasize;
|
||||
cufftHandle plan_fast;
|
||||
cufftHandle plan_mid;
|
||||
cufftHandle plan_slow;
|
||||
cufftHandle plan_3d;
|
||||
int nfast;
|
||||
int nmid;
|
||||
int nslow;
|
||||
int ihi_out,ilo_out,jhi_out,jlo_out,khi_out,klo_out;
|
||||
int me,nprocs;
|
||||
#endif
|
||||
int init;
|
||||
};
|
||||
|
||||
// function prototypes
|
||||
|
||||
void fft_3d_destroy_plan_cuda(struct fft_plan_3d *);
|
||||
void factor_cuda(int, int *, int *);
|
||||
void bifactor_cuda(int, int *, int *);
|
||||
void fft_1d_only_cuda(FFT_DATA *, int, int, struct fft_plan_3d *);
|
||||
void fft_3d_cudaA(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *);
|
||||
void fft_3d_cuda(FFT_DATA *, FFT_DATA *, int, struct fft_plan_3d *);
|
||||
struct fft_plan_3d *fft_3d_create_plan_cuda(MPI_Comm, int, int, int,
|
||||
int, int, int, int, int, int, int, int, int, int, int, int,
|
||||
int, int, int *,bool init);
|
|
@ -0,0 +1,111 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "fft3d_wrap_cuda.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FFT3dCuda::FFT3dCuda(LAMMPS *lmp, MPI_Comm comm, int nfast, int nmid, int nslow,
|
||||
int in_ilo, int in_ihi, int in_jlo, int in_jhi,
|
||||
int in_klo, int in_khi,
|
||||
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
|
||||
int out_klo, int out_khi,
|
||||
int scaled, int permute, int *nbuf,bool init) : Pointers(lmp)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
plan = fft_3d_create_plan_cuda(comm,nfast,nmid,nslow,
|
||||
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
|
||||
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
|
||||
scaled,permute,nbuf,init);
|
||||
#endif
|
||||
#ifndef FFT_CUFFT
|
||||
plan = fft_3d_create_plan(comm,nfast,nmid,nslow,
|
||||
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
|
||||
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
|
||||
scaled,permute,nbuf);
|
||||
#endif
|
||||
if (plan == NULL) error->one("Could not create 3d FFT plan");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FFT3dCuda::~FFT3dCuda()
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
fft_3d_destroy_plan_cuda(plan);
|
||||
#endif
|
||||
#ifndef FFT_CUFFT
|
||||
fft_3d_destroy_plan(plan);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FFT3dCuda::compute(double *in, double *out, int flag)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
fft_3d_cuda((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
|
||||
#endif
|
||||
#ifndef FFT_CUFFT
|
||||
fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FFT3dCuda::timing1d(double *in, int nsize, int flag)
|
||||
{
|
||||
#ifdef FFT_CUFFT
|
||||
fft_1d_only_cuda((FFT_DATA *) in,nsize,flag,plan);
|
||||
#endif
|
||||
#ifndef FFT_CUFFT
|
||||
fft_1d_only((FFT_DATA *) in,nsize,flag,plan);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
void FFT3dCuda::set_cudata(void* cudata,void* cudata2)
|
||||
{
|
||||
|
||||
plan->cudata=(cufftData*) cudata;
|
||||
plan->cudata2=(cufftData*) cudata2;
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,68 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef FFT3D_WRAP_CUDA_H_
|
||||
#define FFT3D_WRAP_CUDA_H_
|
||||
|
||||
#include "pointers.h"
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
#include "fft3d_cuda.h"
|
||||
#endif
|
||||
#ifndef FFT_CUFFT
|
||||
#include "fft3d.h"
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FFT3dCuda : protected Pointers {
|
||||
public:
|
||||
FFT3dCuda(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,int,int,int,
|
||||
int,int,int,int,int,int,int,int,int *,bool);
|
||||
~FFT3dCuda();
|
||||
void compute(double *, double *, int);
|
||||
void timing1d(double *, int, int);
|
||||
|
||||
#ifdef FFT_CUFFT
|
||||
void set_cudata(void* cudata,void* cudata2);
|
||||
#endif
|
||||
private:
|
||||
struct fft_plan_3d *plan;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /*FFT3D_WRAP_CUDA_H_*/
|
|
@ -0,0 +1,190 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "fix_addforce_cuda.h"
|
||||
#include "fix_addforce_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "domain.h"
|
||||
#include "cuda.h"
|
||||
#include "memory.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixAddForceCuda::FixAddForceCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg < 6) error->all("Illegal fix addforce/cuda command");
|
||||
|
||||
scalar_flag = 1;
|
||||
vector_flag = 1;
|
||||
size_vector = 3;
|
||||
global_freq = 1;
|
||||
extscalar = 1;
|
||||
extvector = 1;
|
||||
|
||||
xvalue = atof(arg[3]);
|
||||
yvalue = atof(arg[4]);
|
||||
zvalue = atof(arg[5]);
|
||||
|
||||
// optional args
|
||||
|
||||
iregion = -1;
|
||||
|
||||
int iarg = 6;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"region") == 0) {
|
||||
if (iarg+2 > narg) error->all("Illegal fix addforce/cuda command");
|
||||
iregion = domain->find_region(arg[iarg+1]);
|
||||
if (iregion == -1) error->all("Fix addforce/cuda region ID does not exist");
|
||||
iarg += 2;
|
||||
} else error->all("Illegal fix addforce/cuda command");
|
||||
}
|
||||
|
||||
if(iregion!=-1) error->all("Error: fix addforce/cuda does not currently support 'region' option");
|
||||
|
||||
force_flag = 0;
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
|
||||
cu_foriginal = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixAddForceCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
mask |= THERMO_ENERGY_CUDA;
|
||||
mask |= POST_FORCE_RESPA;
|
||||
mask |= MIN_POST_FORCE_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::init()
|
||||
{
|
||||
if(not cu_foriginal)
|
||||
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
|
||||
if (strcmp(update->integrate_style,"respa") == 0)
|
||||
nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::setup(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixAddForceCuda::setup\n"); )
|
||||
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixAddForceCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
|
||||
}
|
||||
else {
|
||||
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
|
||||
cuda->cu_f->download();
|
||||
post_force_respa(vflag,nlevels_respa-1,0);
|
||||
cuda->cu_f->upload();
|
||||
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
|
||||
}
|
||||
MYDBG( printf("# CUDA: FixAddForceCuda::setup done\n"); )
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::min_setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::post_force(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixAddForceCuda::postforce start\n"); )
|
||||
force_flag = 0;
|
||||
cu_foriginal->memset_device(0);
|
||||
Cuda_FixAddForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_FLOAT*) cu_foriginal->dev_data());
|
||||
cu_foriginal->download();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
if (ilevel == nlevels_respa-1) post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAddForceCuda::min_post_force(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
potential energy of added force
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double FixAddForceCuda::compute_scalar()
|
||||
{
|
||||
// only sum across procs one time
|
||||
|
||||
if (force_flag == 0) {
|
||||
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
|
||||
force_flag = 1;
|
||||
}
|
||||
return foriginal_all[0];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double FixAddForceCuda::compute_vector(int n)
|
||||
{
|
||||
// only sum across procs one time
|
||||
|
||||
if (force_flag == 0) {
|
||||
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
|
||||
force_flag = 1;
|
||||
}
|
||||
return foriginal_all[n+1];
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(addforce/cuda,FixAddForceCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_ADD_FORCE_CUDA_H
|
||||
#define LMP_FIX_ADD_FORCE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixAddForceCuda : public Fix {
|
||||
public:
|
||||
FixAddForceCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
void min_post_force(int);
|
||||
double compute_scalar();
|
||||
double compute_vector(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int iregion;
|
||||
double xvalue,yvalue,zvalue;
|
||||
double foriginal[4],foriginal_all[4];
|
||||
cCudaData<double , F_FLOAT , x>* cu_foriginal;
|
||||
int force_flag;
|
||||
int nlevels_respa;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,229 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
#include "mpi.h"
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "fix_aveforce_cuda.h"
|
||||
#include "fix_aveforce_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "domain.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 6) error->all("Illegal fix aveforce command");
|
||||
|
||||
vector_flag = 1;
|
||||
size_vector = 3;
|
||||
global_freq = 1;
|
||||
extvector = 1;
|
||||
|
||||
xflag = yflag = zflag = 1;
|
||||
if (strcmp(arg[3],"NULL") == 0) xflag = 0;
|
||||
else xvalue = atof(arg[3]);
|
||||
if (strcmp(arg[4],"NULL") == 0) yflag = 0;
|
||||
else yvalue = atof(arg[4]);
|
||||
if (strcmp(arg[5],"NULL") == 0) zflag = 0;
|
||||
else zvalue = atof(arg[5]);
|
||||
|
||||
// optional args
|
||||
|
||||
iregion = -1;
|
||||
|
||||
int iarg = 6;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"region") == 0) {
|
||||
if (iarg+2 > narg) error->all("Illegal fix aveforce command");
|
||||
iregion = domain->find_region(arg[iarg+1]);
|
||||
if (iregion == -1) error->all("Fix aveforce region ID does not exist");
|
||||
iarg += 2;
|
||||
} else error->all("Illegal fix aveforce command");
|
||||
|
||||
}
|
||||
|
||||
if(iregion!=-1) error->all("Error: fix aveforce/cuda does not currently support 'region' option");
|
||||
|
||||
foriginal_all[0] = foriginal_all[1] = foriginal_all[2] = foriginal_all[3] = 0.0;
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
|
||||
cu_foriginal = NULL;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixAveForceCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
mask |= POST_FORCE_RESPA;
|
||||
mask |= MIN_POST_FORCE_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::init()
|
||||
{
|
||||
if(not cu_foriginal)
|
||||
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
|
||||
if (strcmp(update->integrate_style,"respa") == 0)
|
||||
nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||
|
||||
// ncount = total # of atoms in group
|
||||
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::setup(int vflag)
|
||||
{
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixAveForceCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda->cu_f->download();
|
||||
for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
|
||||
((Respa *) update->integrate)->copy_flevel_f(ilevel);
|
||||
post_force_respa(vflag,ilevel,0);
|
||||
((Respa *) update->integrate)->copy_f_flevel(ilevel);
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::min_setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::post_force(int vflag)
|
||||
{
|
||||
// sum forces on participating atoms
|
||||
|
||||
cu_foriginal->memset_device(0);
|
||||
Cuda_FixAveForceCuda_PostForce_FOrg(&cuda->shared_data, groupbit,(F_FLOAT*) cu_foriginal->dev_data());
|
||||
cu_foriginal->download();
|
||||
|
||||
// average the force on participating atoms
|
||||
// add in requested amount
|
||||
|
||||
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
|
||||
int ncount = static_cast<int> (foriginal_all[3]);
|
||||
if (ncount == 0) return;
|
||||
double fave[3];
|
||||
fave[0] = foriginal_all[0]/ncount + xvalue;
|
||||
fave[1] = foriginal_all[1]/ncount + yvalue;
|
||||
fave[2] = foriginal_all[2]/ncount + zvalue;
|
||||
|
||||
// set force of all participating atoms to same value
|
||||
// only for active dimensions
|
||||
|
||||
Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,xflag,yflag,zflag,fave[0],fave[1],fave[2]);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
// ave + extra force on outermost level
|
||||
// just ave on inner levels
|
||||
if (ilevel == nlevels_respa-1) post_force(vflag);
|
||||
else {
|
||||
cuda->cu_f->download();
|
||||
cuda->cu_mask->download();
|
||||
double **f = atom->f;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double foriginal[4];
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
|
||||
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
foriginal[0] += f[i][0];
|
||||
foriginal[1] += f[i][1];
|
||||
foriginal[2] += f[i][2];
|
||||
foriginal[3] += 1;
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
|
||||
int ncount = static_cast<int> (foriginal_all[3]);
|
||||
if (ncount == 0) return;
|
||||
double fave[3];
|
||||
fave[0] = foriginal_all[0]/ncount;
|
||||
fave[1] = foriginal_all[1]/ncount;
|
||||
fave[2] = foriginal_all[2]/ncount;
|
||||
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
if (xflag) f[i][0] = fave[0];
|
||||
if (yflag) f[i][1] = fave[1];
|
||||
if (zflag) f[i][2] = fave[2];
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixAveForceCuda::min_post_force(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double FixAveForceCuda::compute_vector(int n)
|
||||
{
|
||||
return foriginal_all[n];
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(aveforce/cuda,FixAveForceCuda)
|
||||
|
||||
#else
|
||||
|
||||
|
||||
#ifndef LMP_FIX_AVE_FORCE_CUDA_H
|
||||
#define LMP_FIX_AVE_FORCE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixAveForceCuda : public Fix {
|
||||
public:
|
||||
FixAveForceCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
void min_post_force(int);
|
||||
double compute_vector(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int xflag,yflag,zflag,iregion;
|
||||
double xvalue,yvalue,zvalue;
|
||||
double foriginal_all[4];
|
||||
double foriginal[4];
|
||||
cCudaData<double , F_FLOAT , x>* cu_foriginal;
|
||||
int nlevels_respa;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,169 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include "fix_enforce2d_cuda.h"
|
||||
#include "fix_enforce2d_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixEnforce2DCuda::FixEnforce2DCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 3) error->all("Illegal fix enforce2d command");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixEnforce2DCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
mask |= POST_FORCE_RESPA;
|
||||
mask |= MIN_POST_FORCE_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::init()
|
||||
{
|
||||
if (domain->dimension == 3)
|
||||
error->all("Cannot use fix enforce2d/cuda with 3d simulation");
|
||||
if (atom->omega_flag)
|
||||
error->warning("Enforce2d/cuda does not support omega_flag on gpu yet. Will be handled on cpu.");
|
||||
|
||||
if (atom->angmom_flag)
|
||||
error->warning("Enforce2d/cuda does not support angmom_flag (angular momentum) on gpu yet. Will be handled on cpu.");
|
||||
|
||||
if (atom->torque_flag)
|
||||
error->warning("Enforce2d/cuda does not support torque_flag on gpu yet. Will be handled on cpu.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::setup(int vflag)
|
||||
{
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixEnforce2dCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
cuda->cu_v->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
cuda->cu_v->download();
|
||||
}
|
||||
else {
|
||||
int nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||
for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
|
||||
((Respa *) update->integrate)->copy_flevel_f(ilevel);
|
||||
post_force_respa(vflag,ilevel,0);
|
||||
((Respa *) update->integrate)->copy_f_flevel(ilevel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::min_setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::post_force(int vflag)
|
||||
{
|
||||
Cuda_FixEnforce2dCuda_PostForce(&cuda->shared_data, groupbit);
|
||||
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
if (igroup == atom->firstgroup) nlocal = atom->nfirst;
|
||||
|
||||
if (atom->omega_flag) {
|
||||
double **omega = atom->omega;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
omega[i][0] = 0.0;
|
||||
omega[i][1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (atom->angmom_flag) {
|
||||
double **angmom = atom->angmom;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
angmom[i][0] = 0.0;
|
||||
angmom[i][1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (atom->torque_flag) {
|
||||
double **torque = atom->torque;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixEnforce2DCuda::min_post_force(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(enforce2d/cuda,FixEnforce2DCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_ENFORCE2D_CUDA_H
|
||||
#define LMP_FIX_ENFORCE2D_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixEnforce2DCuda : public Fix {
|
||||
public:
|
||||
FixEnforce2DCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
void min_post_force(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,135 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "fix_freeze_cuda.h"
|
||||
#include "fix_freeze_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixFreezeCuda::FixFreezeCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 3) error->all("Illegal fix freeze command");
|
||||
|
||||
if (!atom->torque_flag)
|
||||
error->all("Fix freeze requires atom attribute torque");
|
||||
|
||||
vector_flag = 1;
|
||||
size_vector = 3;
|
||||
global_freq = 1;
|
||||
extvector = 1;
|
||||
|
||||
|
||||
|
||||
force_flag = 0;
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
|
||||
cu_foriginal=NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixFreezeCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
mask |= THERMO_ENERGY_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixFreezeCuda::init()
|
||||
{
|
||||
if(not cu_foriginal)
|
||||
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,3);
|
||||
int count = 0;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"freeze") == 0) count++;
|
||||
if (count > 1) error->all("More than one fix freeze");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixFreezeCuda::setup(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixFreezeCuda::setup\n"); )
|
||||
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixFreezeCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
|
||||
}
|
||||
|
||||
MYDBG( printf("# CUDA: FixFreezeCuda::setup done\n"); )
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixFreezeCuda::post_force(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixFreezeCuda::postforce start\n"); )
|
||||
force_flag = 0;
|
||||
cu_foriginal->memset_device(0);
|
||||
Cuda_FixFreezeCuda_PostForce(&cuda->shared_data, groupbit, (F_FLOAT*) cu_foriginal->dev_data());
|
||||
cu_foriginal->download();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double FixFreezeCuda::compute_vector(int n)
|
||||
{
|
||||
// only sum across procs one time
|
||||
|
||||
if (force_flag == 0) {
|
||||
MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world);
|
||||
force_flag = 1;
|
||||
}
|
||||
return foriginal_all[n+1];
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(freeze/cuda,FixFreezeCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_FREEZE_CUDA_H
|
||||
#define LMP_FIX_FREEZE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixFreezeCuda : public Fix {
|
||||
public:
|
||||
FixFreezeCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
double compute_vector(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
double foriginal[3],foriginal_all[3];
|
||||
cCudaData<double , F_FLOAT , x>* cu_foriginal;
|
||||
int force_flag;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,181 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "fix_gravity_cuda.h"
|
||||
#include "fix_gravity_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{CHUTE,SPHERICAL,GRADIENT,VECTOR};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixGravityCuda::FixGravityCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg < 5) error->all("Illegal fix gravity command");
|
||||
|
||||
time_depend = 1;
|
||||
|
||||
magnitude = atof(arg[3]);
|
||||
|
||||
if (strcmp(arg[4],"chute") == 0) {
|
||||
if (narg != 6) error->all("Illegal fix gravity command");
|
||||
style = CHUTE;
|
||||
phi = 0.0;
|
||||
theta = 180.0 - atof(arg[5]);
|
||||
} else if (strcmp(arg[4],"spherical") == 0) {
|
||||
if (narg != 7) error->all("Illegal fix gravity command");
|
||||
style = SPHERICAL;
|
||||
phi = atof(arg[5]);
|
||||
theta = atof(arg[6]);
|
||||
} else if (strcmp(arg[4],"gradient") == 0) {
|
||||
if (narg != 9) error->all("Illegal fix gravity command");
|
||||
style = GRADIENT;
|
||||
phi = atof(arg[5]);
|
||||
theta = atof(arg[6]);
|
||||
phigrad = atof(arg[7]);
|
||||
thetagrad = atof(arg[8]);
|
||||
} else if (strcmp(arg[4],"vector") == 0) {
|
||||
if (narg != 8) error->all("Illegal fix gravity command");
|
||||
style = VECTOR;
|
||||
xdir = atof(arg[5]);
|
||||
ydir = atof(arg[6]);
|
||||
zdir = atof(arg[7]);
|
||||
} else error->all("Illegal fix gravity command");
|
||||
|
||||
double PI = 4.0*atan(1.0);
|
||||
degree2rad = PI/180.0;
|
||||
|
||||
if (style == CHUTE || style == SPHERICAL || style == GRADIENT) {
|
||||
if (domain->dimension == 3) {
|
||||
xgrav = sin(degree2rad * theta) * cos(degree2rad * phi);
|
||||
ygrav = sin(degree2rad * theta) * sin(degree2rad * phi);
|
||||
zgrav = cos(degree2rad * theta);
|
||||
} else {
|
||||
xgrav = sin(degree2rad * theta);
|
||||
ygrav = cos(degree2rad * theta);
|
||||
zgrav = 0.0;
|
||||
}
|
||||
} else if (style == VECTOR) {
|
||||
if (domain->dimension == 3) {
|
||||
double length = sqrt(xdir*xdir + ydir*ydir + zdir*zdir);
|
||||
xgrav = xdir/length;
|
||||
ygrav = ydir/length;
|
||||
zgrav = zdir/length;
|
||||
} else {
|
||||
double length = sqrt(xdir*xdir + ydir*ydir);
|
||||
xgrav = xdir/length;
|
||||
ygrav = ydir/length;
|
||||
zgrav = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
time_origin = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixGravityCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGravityCuda::init()
|
||||
{
|
||||
dt = update->dt;
|
||||
|
||||
xacc = magnitude*xgrav;
|
||||
yacc = magnitude*ygrav;
|
||||
zacc = magnitude*zgrav;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGravityCuda::setup(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixGravityCuda::setup\n"); )
|
||||
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixGravityCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
|
||||
}
|
||||
else {
|
||||
}
|
||||
MYDBG( printf("# CUDA: FixGravityCuda::setup done\n"); )
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGravityCuda::post_force(int vflag)
|
||||
{
|
||||
// update direction of gravity vector if gradient style
|
||||
|
||||
if (style == GRADIENT) {
|
||||
if (domain->dimension == 3) {
|
||||
double phi_current = degree2rad *
|
||||
(phi + (update->ntimestep - time_origin)*dt*phigrad*360.0);
|
||||
double theta_current = degree2rad *
|
||||
(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
|
||||
xgrav = sin(theta_current) * cos(phi_current);
|
||||
ygrav = sin(theta_current) * sin(phi_current);
|
||||
zgrav = cos(theta_current);
|
||||
} else {
|
||||
double theta_current = degree2rad *
|
||||
(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
|
||||
xgrav = sin(theta_current);
|
||||
ygrav = cos(theta_current);
|
||||
}
|
||||
xacc = magnitude*xgrav;
|
||||
yacc = magnitude*ygrav;
|
||||
zacc = magnitude*zgrav;
|
||||
}
|
||||
|
||||
MYDBG( printf("# CUDA: FixGravityCuda::postforce start\n"); )
|
||||
Cuda_FixGravityCuda_PostForce(&cuda->shared_data, groupbit, xacc,yacc,zacc);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(gravity/cuda,FixGravityCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_GRAVITY_CUDA_H
|
||||
#define LMP_FIX_GRAVITY_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixGravityCuda : public Fix {
|
||||
public:
|
||||
FixGravityCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int style;
|
||||
double magnitude,dt;
|
||||
double phi,theta,phigrad,thetagrad;
|
||||
double xdir,ydir,zdir;
|
||||
double xgrav,ygrav,zgrav,xacc,yacc,zacc;
|
||||
double degree2rad;
|
||||
int time_origin;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_FIX_NH_CUDA_H
|
||||
#define LMP_FIX_NH_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_precision.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNHCuda : public Fix {
|
||||
public:
|
||||
FixNHCuda(class LAMMPS *, int, char **);
|
||||
virtual ~FixNHCuda();
|
||||
int setmask();
|
||||
virtual void init();
|
||||
void setup(int);
|
||||
virtual void initial_integrate(int);
|
||||
virtual void final_integrate();
|
||||
void initial_integrate_respa(int, int, int);
|
||||
void final_integrate_respa(int, int);
|
||||
double compute_scalar();
|
||||
double compute_vector(int);
|
||||
void write_restart(FILE *);
|
||||
void restart(char *);
|
||||
int modify_param(int, char **);
|
||||
void reset_dt();
|
||||
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
int dimension,which;
|
||||
double dtv,dtf,dthalf,dt4,dt8,dto;
|
||||
double boltz,nktv2p,tdof;
|
||||
double vol0,t0;
|
||||
|
||||
double t_start,t_stop;
|
||||
double t_current,t_target;
|
||||
double t_freq;
|
||||
|
||||
int tstat_flag; // 1 if control T
|
||||
int pstat_flag; // 1 if control P
|
||||
|
||||
int pstyle,pcouple,allremap;
|
||||
int p_flag[6]; // 1 if control P on this dim, 0 if not
|
||||
double p_start[6],p_stop[6];
|
||||
double p_freq[6],p_target[6];
|
||||
double omega[6],omega_dot[6];
|
||||
double omega_mass[6];
|
||||
double p_current[6],dilation[6];
|
||||
double drag,tdrag_factor; // drag factor on particle thermostat
|
||||
double pdrag_factor; // drag factor on barostat
|
||||
double factor[6]; // velocity scaling due to barostat
|
||||
int kspace_flag; // 1 if KSpace invoked, 0 if not
|
||||
int nrigid; // number of rigid fixes
|
||||
int *rfix; // indices of rigid fixes
|
||||
|
||||
int nlevels_respa;
|
||||
double *step_respa;
|
||||
|
||||
char *id_temp,*id_press;
|
||||
class Compute *temperature,*pressure;
|
||||
int tflag,pflag;
|
||||
|
||||
double *eta,*eta_dot; // chain thermostat for particles
|
||||
double *eta_dotdot;
|
||||
double *eta_mass;
|
||||
int mtchain; // length of chain
|
||||
|
||||
double *etap; // chain thermostat for barostat
|
||||
double *etap_dot;
|
||||
double *etap_dotdot;
|
||||
double *etap_mass;
|
||||
int mpchain; // length of chain
|
||||
|
||||
int mtk_flag; // 0 if using Hoover barostat
|
||||
double mtk_term1,mtk_term2;
|
||||
int mtchain_default_flag;
|
||||
int pdim; // number of barostatted dims
|
||||
double mvv_current[3]; // diagonal of KE tensor
|
||||
double mtk_factor; // MTK factor
|
||||
double p_freq_max; // maximum barostat frequency
|
||||
|
||||
double p_hydro; // hydrostatic target pressure
|
||||
|
||||
int nc_tchain,nc_pchain;
|
||||
double factor_eta;
|
||||
double sigma[6]; // scaled target stress
|
||||
double fdev[6]; // deviatoric force on barostat
|
||||
int deviatoric_flag; // 0 if target stress tensor is hydrostatic
|
||||
double h0_inv[6]; // h_inv of reference (zero strain) box
|
||||
int nreset_h0; // interval for resetting h0
|
||||
|
||||
void couple();
|
||||
void couple_ke();
|
||||
void remap();
|
||||
void nhc_temp_integrate();
|
||||
void nhc_press_integrate();
|
||||
|
||||
virtual void nve_x(); // may be overwritten by child classes
|
||||
virtual void nve_v();
|
||||
virtual void nh_v_press();
|
||||
virtual void nh_v_temp();
|
||||
|
||||
void compute_sigma();
|
||||
void compute_deviatoric();
|
||||
double compute_strain_energy();
|
||||
void compute_press_target();
|
||||
void nh_omega_dot();
|
||||
|
||||
X_FLOAT triggerneighsq;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,71 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include "fix_npt_cuda.h"
|
||||
#include "modify.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixNPTCuda::FixNPTCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNHCuda(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (!tstat_flag)
|
||||
error->all("Temperature control must be used with fix npt");
|
||||
if (!pstat_flag)
|
||||
error->all("Pressure control must be used with fix npt");
|
||||
|
||||
// create a new compute temp style
|
||||
// id = fix-ID + temp
|
||||
// compute group = all since pressure is always global (group all)
|
||||
// and thus its KE/temperature contribution should use group all
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[3];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = (char *) "all";
|
||||
newarg[2] = (char *) "temp/cuda";
|
||||
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
|
||||
// create a new compute pressure style
|
||||
// id = fix-ID + press, compute group = all
|
||||
// pass id_temp as 4th arg to pressure constructor
|
||||
|
||||
n = strlen(id) + 7;
|
||||
id_press = new char[n];
|
||||
strcpy(id_press,id);
|
||||
strcat(id_press,"_press");
|
||||
|
||||
newarg = new char*[4];
|
||||
newarg[0] = id_press;
|
||||
newarg[1] = (char *) "all";
|
||||
newarg[2] = (char *) "pressure/cuda";
|
||||
newarg[3] = id_temp;
|
||||
modify->add_compute(4,newarg);
|
||||
delete [] newarg;
|
||||
pflag = 1;
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(npt/cuda,FixNPTCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NPTCuda_H
|
||||
#define LMP_FIX_NPTCuda_H
|
||||
|
||||
#include "fix_nh_cuda.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNPTCuda : public FixNHCuda {
|
||||
public:
|
||||
FixNPTCuda(class LAMMPS *, int, char **);
|
||||
~FixNPTCuda() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,155 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "fix_nve_cuda.h"
|
||||
#include "fix_nve_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixNVECuda::FixNVECuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (strcmp(style,"nve/sphere") != 0 && narg < 3)
|
||||
error->all("Illegal fix nve command");
|
||||
|
||||
time_integrate = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixNVECuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= INITIAL_INTEGRATE_CUDA;
|
||||
mask |= FINAL_INTEGRATE_CUDA;
|
||||
// mask |= INITIAL_INTEGRATE_RESPA_CUDA;
|
||||
// mask |= FINAL_INTEGRATE_RESPA_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::init()
|
||||
{
|
||||
dtv = update->dt;
|
||||
dtf = 0.5 * update->dt * force->ftm2v;
|
||||
|
||||
if (strcmp(update->integrate_style,"respa") == 0)
|
||||
step_respa = ((Respa *) update->integrate)->step;
|
||||
|
||||
triggerneighsq= cuda->shared_data.atom.triggerneighsq;
|
||||
cuda->neighbor_decide_by_integrator=1;
|
||||
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allow for both per-type and per-atom mass
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::initial_integrate(int vflag)
|
||||
{
|
||||
if(triggerneighsq!=cuda->shared_data.atom.triggerneighsq)
|
||||
{
|
||||
triggerneighsq= cuda->shared_data.atom.triggerneighsq;
|
||||
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
|
||||
}
|
||||
int nlocal = atom->nlocal;
|
||||
if(igroup == atom->firstgroup) nlocal = atom->nfirst;
|
||||
|
||||
Cuda_FixNVECuda_InitialIntegrate(& cuda->shared_data, groupbit,nlocal);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::final_integrate()
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if(igroup == atom->firstgroup) nlocal = atom->nfirst;
|
||||
|
||||
Cuda_FixNVECuda_FinalIntegrate(& cuda->shared_data, groupbit,nlocal);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::initial_integrate_respa(int vflag, int ilevel, int flag)
|
||||
{
|
||||
//this point should not be reached yet since RESPA is not supported
|
||||
if (flag) return; // only used by NPT,NPH
|
||||
|
||||
dtv = step_respa[ilevel];
|
||||
dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
|
||||
|
||||
// innermost level - NVE update of v and x
|
||||
// all other levels - NVE update of v
|
||||
|
||||
if(ilevel == 0) initial_integrate(vflag);
|
||||
else final_integrate();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::final_integrate_respa(int ilevel, int iloop)
|
||||
{
|
||||
//this point should not be reached yet since RESPA is not supported
|
||||
dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
|
||||
final_integrate();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVECuda::reset_dt()
|
||||
{
|
||||
dtv = update->dt;
|
||||
dtf = 0.5 * update->dt * force->ftm2v;
|
||||
Cuda_FixNVECuda_Init(&cuda->shared_data,dtv,dtf);
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(nve/cuda,FixNVECuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NVE_CUDA_H
|
||||
#define LMP_FIX_NVE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_precision.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNVECuda : public Fix
|
||||
{
|
||||
public:
|
||||
FixNVECuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
virtual void init();
|
||||
virtual void initial_integrate(int);
|
||||
virtual void final_integrate();
|
||||
void initial_integrate_respa(int, int, int);
|
||||
void final_integrate_respa(int, int);
|
||||
void reset_dt();
|
||||
|
||||
X_FLOAT triggerneighsq;
|
||||
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
double dtv, dtf;
|
||||
double *step_respa;
|
||||
int mass_require;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include "fix_nvt_cuda.h"
|
||||
#include "group.h"
|
||||
#include "modify.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixNVTCuda::FixNVTCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNHCuda(lmp, narg, arg)
|
||||
{
|
||||
if (!tstat_flag)
|
||||
error->all("Temperature control must be used with fix nvt");
|
||||
if (pstat_flag)
|
||||
error->all("Pressure control can not be used with fix nvt");
|
||||
|
||||
// create a new compute temp style
|
||||
// id = fix-ID + temp
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[3];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp/cuda";
|
||||
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(nvt/cuda,FixNVTCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NVTCuda_H
|
||||
#define LMP_FIX_NVTCuda_H
|
||||
|
||||
#include "fix_nh_cuda.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNVTCuda : public FixNHCuda {
|
||||
public:
|
||||
FixNVTCuda(class LAMMPS *, int, char **);
|
||||
~FixNVTCuda() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,181 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "fix_set_force_cuda.h"
|
||||
#include "fix_set_force_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "memory.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixSetForceCuda::FixSetForceCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 6) error->all("Illegal fix setforce/cuda command");
|
||||
|
||||
vector_flag = 1;
|
||||
size_vector = 3;
|
||||
global_freq = 1;
|
||||
extvector = 1;
|
||||
|
||||
flagx = flagy = flagz = 1;
|
||||
if (strcmp(arg[3],"NULL") == 0) flagx = 0;
|
||||
else xvalue = atof(arg[3]);
|
||||
if (strcmp(arg[4],"NULL") == 0) flagy = 0;
|
||||
else yvalue = atof(arg[4]);
|
||||
if (strcmp(arg[5],"NULL") == 0) flagz = 0;
|
||||
else zvalue = atof(arg[5]);
|
||||
|
||||
force_flag = 0;
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
|
||||
cu_foriginal=NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixSetForceCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
mask |= THERMO_ENERGY_CUDA;
|
||||
mask |= POST_FORCE_RESPA;
|
||||
mask |= MIN_POST_FORCE_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::init()
|
||||
{
|
||||
if(not cu_foriginal)
|
||||
cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,3);
|
||||
if (strcmp(update->integrate_style,"respa") == 0)
|
||||
nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::setup(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixSetForceCuda::setup\n"); )
|
||||
|
||||
if (strcmp(update->integrate_style,"verlet") == 0)
|
||||
{
|
||||
Cuda_FixSetForceCuda_Init(&cuda->shared_data);
|
||||
cuda->cu_f->upload();
|
||||
post_force(vflag);
|
||||
cuda->cu_f->download();
|
||||
|
||||
}
|
||||
else {
|
||||
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
|
||||
cuda->cu_f->download();
|
||||
post_force_respa(vflag,nlevels_respa-1,0);
|
||||
cuda->cu_f->upload();
|
||||
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
|
||||
}
|
||||
MYDBG( printf("# CUDA: FixSetForceCuda::setup done\n"); )
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::min_setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::post_force(int vflag)
|
||||
{
|
||||
MYDBG( printf("# CUDA: FixSetForceCuda::postforce start\n"); )
|
||||
force_flag = 0;
|
||||
cu_foriginal->memset_device(0);
|
||||
Cuda_FixSetForceCuda_PostForce(&cuda->shared_data, groupbit, xvalue, yvalue,zvalue,(F_FLOAT*) cu_foriginal->dev_data(),flagx,flagy,flagz);
|
||||
cu_foriginal->download();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
if (ilevel == nlevels_respa-1) post_force(vflag);
|
||||
else {
|
||||
cuda->cu_f->download();
|
||||
cuda->cu_mask->download();
|
||||
|
||||
double **f = atom->f;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
foriginal[0] = foriginal[1] = foriginal[2] = 0.0;
|
||||
force_flag = 0;
|
||||
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) {
|
||||
foriginal[0] += f[i][0];
|
||||
foriginal[1] += f[i][1];
|
||||
foriginal[2] += f[i][2];
|
||||
if (flagx) f[i][0] = 0.0;
|
||||
if (flagy) f[i][1] = 0.0;
|
||||
if (flagz) f[i][2] = 0.0;
|
||||
}
|
||||
cuda->cu_f->upload();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixSetForceCuda::min_post_force(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double FixSetForceCuda::compute_vector(int n)
|
||||
{
|
||||
// only sum across procs one time
|
||||
|
||||
if (force_flag == 0) {
|
||||
MPI_Allreduce(foriginal,foriginal_all,3,MPI_DOUBLE,MPI_SUM,world);
|
||||
force_flag = 1;
|
||||
}
|
||||
return foriginal_all[n+1];
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(setforce/cuda,FixSetForceCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_SET_FORCE_CUDA_H
|
||||
#define LMP_FIX_SET_FORCE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixSetForceCuda : public Fix {
|
||||
public:
|
||||
FixSetForceCuda(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
void min_post_force(int);
|
||||
double compute_vector(int);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int flagx,flagy,flagz;
|
||||
double xvalue,yvalue,zvalue;
|
||||
double foriginal[3],foriginal_all[3];
|
||||
cCudaData<double , F_FLOAT , x>* cu_foriginal;
|
||||
int force_flag;
|
||||
int nlevels_respa;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,133 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(shake/cuda,FixShakeCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_SHAKE_CUDA_H
|
||||
#define LMP_FIX_SHAKE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
#include "cuda_data.h"
|
||||
#include "cuda_precision.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixShakeCuda : public Fix {
|
||||
public:
|
||||
FixShakeCuda(class LAMMPS *, int, char **);
|
||||
~FixShakeCuda();
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void pre_neighbor();
|
||||
void post_force(int);
|
||||
//void post_force_respa(int, int, int);
|
||||
|
||||
double memory_usage();
|
||||
void grow_arrays(int);
|
||||
void copy_arrays(int, int);
|
||||
void set_arrays(int);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(int, double *);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
|
||||
int dof(int);
|
||||
void reset_dt();
|
||||
|
||||
double time_postforce;
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int me,nprocs;
|
||||
double PI;
|
||||
double tolerance; // SHAKE tolerance
|
||||
int max_iter; // max # of SHAKE iterations
|
||||
int output_every; // SHAKE stat output every so often
|
||||
int next_output; // timestep for next output
|
||||
|
||||
// settings from input command
|
||||
int *bond_flag,*angle_flag; // bond/angle types to constrain
|
||||
int *type_flag; // constrain bonds to these types
|
||||
double *mass_list; // constrain bonds to these masses
|
||||
int nmass; // # of masses in mass_list
|
||||
bool neighbor_step; // was neighboring done in this step -> need to run the Cuda_FixShake_Init
|
||||
|
||||
double *bond_distance,*angle_distance; // constraint distances
|
||||
cCudaData<double , X_FLOAT , xx >* cu_bond_distance;
|
||||
cCudaData<double , X_FLOAT , xx >* cu_angle_distance;
|
||||
|
||||
int ifix_respa; // rRESPA fix needed by SHAKE
|
||||
int nlevels_respa; // copies of needed rRESPA variables
|
||||
int *loop_respa;
|
||||
double *step_respa;
|
||||
|
||||
double **x,**v,**f; // local ptrs to atom class quantities
|
||||
double *mass,*rmass;
|
||||
int *type;
|
||||
int nlocal;
|
||||
// atom-based arrays
|
||||
int *shake_flag; // 0 if atom not in SHAKE cluster
|
||||
// 1 = size 3 angle cluster
|
||||
// 2,3,4 = size of bond-only cluster
|
||||
int **shake_atom; // global IDs of atoms in cluster
|
||||
// central atom is 1st
|
||||
// lowest global ID is 1st for size 2
|
||||
|
||||
int **shake_type; // bondtype of each bond in cluster
|
||||
// for angle cluster, 3rd value
|
||||
// is angletype
|
||||
double **xshake; // unconstrained atom coords
|
||||
cCudaData<int , int , xx >* cu_shake_flag;
|
||||
cCudaData<int , int , yx >* cu_shake_atom;
|
||||
cCudaData<int , int , yx >* cu_shake_type;
|
||||
cCudaData<double , X_FLOAT , xy >* cu_xshake;
|
||||
cCudaData<int , int , xx >* cu_list;
|
||||
cCudaData<double , ENERGY_FLOAT , xx >* cu_virial;
|
||||
int* countoccur;
|
||||
|
||||
int vflag; // virial flag
|
||||
double dtv,dtfsq; // timesteps for trial move
|
||||
double dtf_inner,dtf_innerhalf; // timesteps for rRESPA trial move
|
||||
|
||||
int *list; // list of clusters to SHAKE
|
||||
int nlist,maxlist; // size and max-size of list
|
||||
|
||||
// stat quantities
|
||||
int *b_count,*b_count_all; // counts for each bond type
|
||||
double *b_ave,*b_max,*b_min; // ave/max/min dist for each bond type
|
||||
double *b_ave_all,*b_max_all,*b_min_all; // MPI summing arrays
|
||||
int *a_count,*a_count_all; // ditto for angle types
|
||||
double *a_ave,*a_max,*a_min;
|
||||
double *a_ave_all,*a_max_all,*a_min_all;
|
||||
|
||||
void find_clusters();
|
||||
void swap_clusters(int i,int j);
|
||||
int masscheck(double);
|
||||
void unconstrained_update();
|
||||
void shake2(int);
|
||||
void shake3(int);
|
||||
void shake4(int);
|
||||
void shake3angle(int);
|
||||
void stats();
|
||||
int bondfind(int, int, int);
|
||||
int anglefind(int, int, int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,220 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include "fix_temp_berendsen_cuda.h"
|
||||
#include "fix_temp_berendsen_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "update.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "compute.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NOBIAS,BIAS};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempBerendsenCuda::FixTempBerendsenCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg != 6) error->all("Illegal fix temp/berendsen/cuda command");
|
||||
|
||||
// Berendsen thermostat should be applied every step
|
||||
|
||||
nevery = 1;
|
||||
|
||||
t_start = atof(arg[3]);
|
||||
t_stop = atof(arg[4]);
|
||||
t_period = atof(arg[5]);
|
||||
|
||||
// error checks
|
||||
|
||||
if (t_period <= 0.0) error->all("Fix temp/berendsen/cuda period must be > 0.0");
|
||||
|
||||
// create a new compute temp style
|
||||
// id = fix-ID + temp, compute group = fix group
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[3];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp/cuda";
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempBerendsenCuda::~FixTempBerendsenCuda()
|
||||
{
|
||||
// delete temperature if fix created it
|
||||
|
||||
if (tflag) modify->delete_compute(id_temp);
|
||||
delete [] id_temp;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempBerendsenCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= END_OF_STEP_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempBerendsenCuda::init()
|
||||
{
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0)
|
||||
error->all("Temperature ID for fix temp/berendsen/cuda does not exist");
|
||||
temperature = modify->compute[icompute];
|
||||
if(not temperature->cudable)
|
||||
error->warning("Fix temp/berendsen/cuda uses non cudable temperature compute");
|
||||
if (temperature->tempbias) which = BIAS;
|
||||
else which = NOBIAS;
|
||||
|
||||
//temperature->init(); //not in original berendsen possible error?
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempBerendsenCuda::end_of_step()
|
||||
{
|
||||
double t_current;
|
||||
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
|
||||
t_current = temperature->compute_scalar();
|
||||
if (t_current == 0.0)
|
||||
error->all("Computed temperature for fix temp/berendsen/cuda cannot be 0.0");
|
||||
|
||||
double delta = update->ntimestep - update->beginstep;
|
||||
delta /= update->endstep - update->beginstep;
|
||||
t_target = t_start + delta * (t_stop-t_start);
|
||||
|
||||
// rescale velocities by lamda
|
||||
|
||||
double lamda = sqrt(1.0 + update->dt/t_period*(t_target/t_current - 1.0));
|
||||
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (which == NOBIAS) {
|
||||
Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda);
|
||||
|
||||
} else {
|
||||
if(not temperature->cudable)
|
||||
{
|
||||
cuda->cu_x->download();cuda->cu_v->download();
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
temperature->remove_bias(i,v[i]);
|
||||
v[i][0] *= lamda;
|
||||
v[i][1] *= lamda;
|
||||
v[i][2] *= lamda;
|
||||
temperature->restore_bias(i,v[i]);
|
||||
}
|
||||
}
|
||||
cuda->cu_v->upload();
|
||||
}
|
||||
else
|
||||
{
|
||||
temperature->remove_bias_all();
|
||||
Cuda_FixTempBerendsenCuda_EndOfStep(&cuda->shared_data, groupbit,lamda);
|
||||
temperature->restore_bias_all();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempBerendsenCuda::modify_param(int narg, char **arg)
|
||||
{
|
||||
if (strcmp(arg[0],"temp") == 0) {
|
||||
if (narg < 2) error->all("Illegal fix_modify command");
|
||||
if (tflag) {
|
||||
modify->delete_compute(id_temp);
|
||||
tflag = 0;
|
||||
}
|
||||
delete [] id_temp;
|
||||
int n = strlen(arg[1]) + 1;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,arg[1]);
|
||||
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
|
||||
temperature = modify->compute[icompute];
|
||||
|
||||
if (temperature->tempflag == 0)
|
||||
error->all("Fix_modify temperature ID does not compute temperature");
|
||||
if (temperature->igroup != igroup && comm->me == 0)
|
||||
error->warning("Group for fix_modify temp != fix group");
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempBerendsenCuda::reset_target(double t_new)
|
||||
{
|
||||
t_start = t_stop = t_new;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(temp/berendsen/cuda,FixTempBerendsenCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_TEMP_BERENDSEN_CUDA_H
|
||||
#define LMP_FIX_TEMP_BERENDSEN_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
class FixTempBerendsenCuda : public Fix {
|
||||
public:
|
||||
FixTempBerendsenCuda(class LAMMPS *, int, char **);
|
||||
~FixTempBerendsenCuda();
|
||||
int setmask();
|
||||
void init();
|
||||
void end_of_step();
|
||||
int modify_param(int, char **);
|
||||
void reset_target(double);
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int which;
|
||||
double t_start,t_stop,t_target,t_period;
|
||||
|
||||
char *id_temp;
|
||||
class Compute *temperature;
|
||||
int tflag;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,222 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include "fix_temp_rescale_cuda.h"
|
||||
#include "fix_temp_rescale_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "region.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "compute.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NOBIAS,BIAS};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempRescaleCuda::FixTempRescaleCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg < 8) error->all("Illegal fix temp/rescale/cuda command");
|
||||
|
||||
nevery = atoi(arg[3]);
|
||||
if (nevery <= 0) error->all("Illegal fix temp/rescale/cuda command");
|
||||
|
||||
scalar_flag = 1;
|
||||
global_freq = nevery;
|
||||
extscalar = 1;
|
||||
|
||||
t_start = atof(arg[4]);
|
||||
t_stop = atof(arg[5]);
|
||||
t_window = atof(arg[6]);
|
||||
fraction = atof(arg[7]);
|
||||
|
||||
// create a new compute temp
|
||||
// id = fix-ID + temp, compute group = fix group
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[6];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp/cuda";
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
|
||||
energy = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempRescaleCuda::~FixTempRescaleCuda()
|
||||
{
|
||||
// delete temperature if fix created it
|
||||
|
||||
if (tflag) modify->delete_compute(id_temp);
|
||||
delete [] id_temp;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempRescaleCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= END_OF_STEP_CUDA;
|
||||
mask |= THERMO_ENERGY_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleCuda::init()
|
||||
{
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0)
|
||||
error->all("Temperature ID for fix temp/rescale/cuda does not exist");
|
||||
temperature = modify->compute[icompute];
|
||||
if(not temperature->cudable)
|
||||
error->warning("Fix temp/rescale/cuda uses non cudable temperature compute");
|
||||
if (temperature->tempbias) which = BIAS;
|
||||
else which = NOBIAS;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleCuda::end_of_step()
|
||||
{
|
||||
double t_current;
|
||||
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
|
||||
t_current = temperature->compute_scalar();
|
||||
if (t_current == 0.0)
|
||||
error->all("Computed temperature for fix temp/rescale/cuda cannot be 0.0");
|
||||
|
||||
double delta = update->ntimestep - update->beginstep;
|
||||
delta /= update->endstep - update->beginstep;
|
||||
double t_target = t_start + delta * (t_stop-t_start);
|
||||
|
||||
// rescale velocity of appropriate atoms if outside window
|
||||
|
||||
if (fabs(t_current-t_target) > t_window) {
|
||||
t_target = t_current - fraction*(t_current-t_target);
|
||||
double factor = sqrt(t_target/t_current);
|
||||
double efactor = 0.5 * force->boltz * temperature->dof;
|
||||
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (which == NOBIAS) {
|
||||
energy += (t_current-t_target) * efactor;
|
||||
|
||||
Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor);
|
||||
|
||||
} else if (which == BIAS) {
|
||||
energy += (t_current-t_target) * efactor;
|
||||
if(not temperature->cudable)
|
||||
{
|
||||
cuda->cu_x->download();cuda->cu_v->download();
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
temperature->remove_bias(i,v[i]);
|
||||
v[i][0] *= factor;
|
||||
v[i][1] *= factor;
|
||||
v[i][2] *= factor;
|
||||
temperature->restore_bias(i,v[i]);
|
||||
}
|
||||
}
|
||||
cuda->cu_v->upload();
|
||||
}
|
||||
else
|
||||
{
|
||||
temperature->remove_bias_all();
|
||||
Cuda_FixTempRescaleCuda_EndOfStep(&cuda->shared_data, groupbit,factor);
|
||||
temperature->restore_bias_all();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempRescaleCuda::modify_param(int narg, char **arg)
|
||||
{
|
||||
if (strcmp(arg[0],"temp") == 0) {
|
||||
if (narg < 2) error->all("Illegal fix_modify command");
|
||||
if (tflag) {
|
||||
modify->delete_compute(id_temp);
|
||||
tflag = 0;
|
||||
}
|
||||
delete [] id_temp;
|
||||
int n = strlen(arg[1]) + 1;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,arg[1]);
|
||||
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
|
||||
temperature = modify->compute[icompute];
|
||||
|
||||
if (temperature->tempflag == 0)
|
||||
error->all("Fix_modify temperature ID does not compute temperature");
|
||||
if (temperature->igroup != igroup && comm->me == 0)
|
||||
error->warning("Group for fix_modify temp != fix group");
|
||||
if(not temperature->cudable)
|
||||
error->warning("Fix temp/rescale/cuda uses non cudable temperature compute");
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleCuda::reset_target(double t_new)
|
||||
{
|
||||
t_start = t_stop = t_new;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double FixTempRescaleCuda::compute_scalar()
|
||||
{
|
||||
return energy;
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(temp/rescale/cuda,FixTempRescaleCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef FIX_TEMP_RESCALE_CUDA_H
|
||||
#define FIX_TEMP_RESCALE_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
class FixTempRescaleCuda : public Fix {
|
||||
public:
|
||||
FixTempRescaleCuda(class LAMMPS *, int, char **);
|
||||
~FixTempRescaleCuda();
|
||||
int setmask();
|
||||
void init();
|
||||
void end_of_step();
|
||||
int modify_param(int, char **);
|
||||
void reset_target(double);
|
||||
double compute_scalar();
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int which;
|
||||
double t_start,t_stop,t_window;
|
||||
double fraction,energy,efactor;
|
||||
|
||||
char *id_temp;
|
||||
class Compute *temperature;
|
||||
int tflag;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,237 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include "fix_temp_rescale_limit_cuda.h"
|
||||
#include "fix_temp_rescale_limit_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "region.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "compute.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
|
||||
#define MAX(A,B) ((A) > (B)) ? (A) : (B)
|
||||
|
||||
enum{NOBIAS,BIAS};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempRescaleLimitCuda::FixTempRescaleLimitCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
if (narg < 9) error->all("Illegal fix temp/rescale/limit/cuda command");
|
||||
|
||||
nevery = atoi(arg[3]);
|
||||
if (nevery <= 0) error->all("Illegal fix temp/rescale/limit/cuda command");
|
||||
|
||||
scalar_flag = 1;
|
||||
global_freq = nevery;
|
||||
extscalar = 1;
|
||||
|
||||
t_start = atof(arg[4]);
|
||||
t_stop = atof(arg[5]);
|
||||
t_window = atof(arg[6]);
|
||||
fraction = atof(arg[7]);
|
||||
limit = atof(arg[8]);
|
||||
if (limit <= 1.0) error->all("Illegal fix temp/rescale/limit/cuda command (limit must be > 1.0)");
|
||||
|
||||
|
||||
// create a new compute temp
|
||||
// id = fix-ID + temp, compute group = fix group
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[6];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp/cuda";
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
|
||||
energy = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixTempRescaleLimitCuda::~FixTempRescaleLimitCuda()
|
||||
{
|
||||
// delete temperature if fix created it
|
||||
|
||||
if (tflag) modify->delete_compute(id_temp);
|
||||
delete [] id_temp;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempRescaleLimitCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= END_OF_STEP_CUDA;
|
||||
mask |= THERMO_ENERGY_CUDA;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleLimitCuda::init()
|
||||
{
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0)
|
||||
error->all("Temperature ID for fix temp/rescale/limit/cuda does not exist");
|
||||
temperature = modify->compute[icompute];
|
||||
if(not temperature->cudable)
|
||||
error->warning("Fix temp/rescale/limit/cuda uses non cudable temperature compute");
|
||||
if (temperature->tempbias) which = BIAS;
|
||||
else which = NOBIAS;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleLimitCuda::end_of_step()
|
||||
{
|
||||
double t_current;
|
||||
if(not temperature->cudable) {cuda->cu_x->download();cuda->cu_v->download();}
|
||||
t_current = temperature->compute_scalar();
|
||||
if (t_current == 0.0)
|
||||
error->all("Computed temperature for fix temp/rescale/limit/cuda cannot be 0.0");
|
||||
|
||||
double delta = update->ntimestep - update->beginstep;
|
||||
delta /= update->endstep - update->beginstep;
|
||||
double t_target = t_start + delta * (t_stop-t_start);
|
||||
|
||||
// rescale velocity of appropriate atoms if outside window
|
||||
|
||||
if (fabs(t_current-t_target) > t_window) {
|
||||
t_target = t_current - fraction*(t_current-t_target);
|
||||
double factor = sqrt(t_target/t_current);
|
||||
double efactor = 0.5 * force->boltz * temperature->dof;
|
||||
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double massone;
|
||||
if(atom->rmass) massone = atom->rmass[0];
|
||||
else massone = atom->mass[0];
|
||||
|
||||
double current_limit=sqrt(limit*force->boltz*t_target*temperature->dof/massone/force->mvv2e);
|
||||
if (which == NOBIAS) {
|
||||
energy += (t_current-t_target) * efactor;
|
||||
|
||||
|
||||
Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit);
|
||||
|
||||
} else if (which == BIAS) {
|
||||
energy += (t_current-t_target) * efactor;
|
||||
if(not temperature->cudable)
|
||||
{
|
||||
cuda->cu_x->download();cuda->cu_v->download();
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
temperature->remove_bias(i,v[i]);
|
||||
double vx = v[i][0] * factor;
|
||||
double vy = v[i][1] * factor;
|
||||
double vz = v[i][2] * factor;
|
||||
v[i][0]=vx>0?MIN(vx,current_limit):MAX(vx,-current_limit);
|
||||
v[i][1]=vy>0?MIN(vy,current_limit):MAX(vy,-current_limit);
|
||||
v[i][2]=vz>0?MIN(vz,current_limit):MAX(vz,-current_limit);
|
||||
|
||||
temperature->restore_bias(i,v[i]);
|
||||
}
|
||||
}
|
||||
cuda->cu_v->upload();
|
||||
}
|
||||
else
|
||||
{
|
||||
temperature->remove_bias_all();
|
||||
Cuda_FixTempRescaleLimitCuda_EndOfStep(&cuda->shared_data, groupbit,factor,current_limit);
|
||||
temperature->restore_bias_all();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixTempRescaleLimitCuda::modify_param(int narg, char **arg)
|
||||
{
|
||||
if (strcmp(arg[0],"temp") == 0) {
|
||||
if (narg < 2) error->all("Illegal fix_modify command");
|
||||
if (tflag) {
|
||||
modify->delete_compute(id_temp);
|
||||
tflag = 0;
|
||||
}
|
||||
delete [] id_temp;
|
||||
int n = strlen(arg[1]) + 1;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,arg[1]);
|
||||
|
||||
int icompute = modify->find_compute(id_temp);
|
||||
if (icompute < 0) error->all("Could not find fix_modify temperature ID");
|
||||
temperature = modify->compute[icompute];
|
||||
|
||||
if (temperature->tempflag == 0)
|
||||
error->all("Fix_modify temperature ID does not compute temperature");
|
||||
if (temperature->igroup != igroup && comm->me == 0)
|
||||
error->warning("Group for fix_modify temp != fix group");
|
||||
if(not temperature->cudable)
|
||||
error->warning("Fix temp/rescale/limit/cuda uses non cudable temperature compute");
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixTempRescaleLimitCuda::reset_target(double t_new)
|
||||
{
|
||||
t_start = t_stop = t_new;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double FixTempRescaleLimitCuda::compute_scalar()
|
||||
{
|
||||
return energy;
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(temp/rescale/limit/cuda,FixTempRescaleLimitCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef FIX_TEMP_RESCALE_LIMIT_CUDA_H
|
||||
#define FIX_TEMP_RESCALE_LIMIT_CUDA_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
class FixTempRescaleLimitCuda : public Fix {
|
||||
public:
|
||||
FixTempRescaleLimitCuda(class LAMMPS *, int, char **);
|
||||
~FixTempRescaleLimitCuda();
|
||||
int setmask();
|
||||
void init();
|
||||
void end_of_step();
|
||||
int modify_param(int, char **);
|
||||
void reset_target(double);
|
||||
double compute_scalar();
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
int which;
|
||||
double t_start,t_stop,t_window;
|
||||
double fraction,energy,efactor;
|
||||
double limit;
|
||||
char *id_temp;
|
||||
class Compute *temperature;
|
||||
int tflag;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,103 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "fix_viscous_cuda.h"
|
||||
#include "fix_viscous_cuda_cu.h"
|
||||
#include "atom.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "cuda_modify_flags.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixViscousCuda::FixViscousCuda(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixViscous(lmp, narg, arg)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
cu_gamma=NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixViscousCuda::~FixViscousCuda()
|
||||
{
|
||||
delete cu_gamma;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixViscousCuda::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE_CUDA;
|
||||
// mask |= POST_FORCE_RESPA;
|
||||
// mask |= MIN_POST_FORCE;
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixViscousCuda::setup(int vflag)
|
||||
{
|
||||
if(not cu_gamma)
|
||||
cu_gamma = new cCudaData<double, F_FLOAT, x> (gamma,atom->ntypes+1);
|
||||
Cuda_FixViscousCuda_Init(&cuda->shared_data);
|
||||
cu_gamma->upload();
|
||||
// if (strcmp(update->integrate_style,"verlet/cuda") == 0)
|
||||
post_force(vflag);
|
||||
/* else {
|
||||
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
|
||||
post_force_respa(vflag,nlevels_respa-1,0);
|
||||
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
|
||||
}*/
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixViscousCuda::min_setup(int vflag)
|
||||
{
|
||||
Cuda_FixViscousCuda_Init(&cuda->shared_data);
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixViscousCuda::post_force(int vflag)
|
||||
{
|
||||
// apply drag force to atoms in group
|
||||
// direction is opposed to velocity vector
|
||||
// magnitude depends on atom type
|
||||
|
||||
Cuda_FixViscousCuda_PostForce(&cuda->shared_data, groupbit,cu_gamma->dev_data());
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(viscous/cuda,FixViscousCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_VISCOUS_CUDA_H
|
||||
#define LMP_FIX_VISCOUS_CUDA_H
|
||||
|
||||
#include "fix_viscous.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixViscousCuda : public FixViscous {
|
||||
public:
|
||||
FixViscousCuda(class LAMMPS *, int, char **);
|
||||
~FixViscousCuda();
|
||||
int setmask();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
cCudaData<double, F_FLOAT, x>* cu_gamma;
|
||||
|
||||
private:
|
||||
class Cuda *cuda;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -63,6 +63,8 @@ using namespace LAMMPS_NS;
|
|||
ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
n_initial_integrate_cuda = 0;
|
||||
n_post_integrate_cuda = 0;
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef CUDA
|
||||
#include "neighbor_cuda.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
|
@ -313,5 +312,4 @@ return;
|
|||
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... end\n");)
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
|
|||
NeighborCuda::NeighborCuda(LAMMPS *lmp) : Neighbor(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_born_coul_long_cuda.h"
|
||||
#include "pair_born_coul_long_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBornCoulLongCuda::PairBornCoulLongCuda(LAMMPS *lmp) : PairBornCoulLong(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulLongCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairBornCoulLong::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.coeff1 = rhoinv;
|
||||
cuda->shared_data.pair.coeff2 = sigma;
|
||||
cuda->shared_data.pair.coeff3 = a;
|
||||
cuda->shared_data.pair.coeff4 = c;
|
||||
cuda->shared_data.pair.coeff5 = d;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulLongCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
MYDBG( printf("PairBornCoulLongCuda compute start\n"); fflush(stdout);)
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
#ifdef CUDA_USE_BINNING
|
||||
Cuda_PairBornCoulLongCuda(& cuda->shared_data, eflag, vflag);
|
||||
#else
|
||||
Cuda_PairBornCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
#endif
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
MYDBG( printf("PairBornCoulLongCuda compute end\n"); fflush(stdout);)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulLongCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairBornCoulLong::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulLongCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairBornCoulLong::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairBornCoulLongCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style born/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
cuda->shared_data.pair.g_ewald=g_ewald;
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairBornCoulLongCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list\n");)
|
||||
PairBornCoulLong::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairBornCoulLongCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairBornCoulLong::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(born/coul/long/cuda,PairBornCoulLongCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BORN_COUL_LONG_CUDA_H
|
||||
#define LMP_PAIR_BORN_COUL_LONG_CUDA_H
|
||||
|
||||
#include "pair_born_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBornCoulLongCuda : public PairBornCoulLong
|
||||
{
|
||||
public:
|
||||
PairBornCoulLongCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,173 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_buck_coul_cut_cuda.h"
|
||||
#include "pair_buck_coul_cut_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBuckCoulCutCuda::PairBuckCoulCutCuda(LAMMPS *lmp) : PairBuckCoulCut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulCutCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairBuckCoulCut::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut_coul = cut_coul;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.coeff1 = rhoinv;
|
||||
cuda->shared_data.pair.coeff2 = buck1;
|
||||
cuda->shared_data.pair.coeff3 = buck2;
|
||||
cuda->shared_data.pair.coeff4 = a;
|
||||
cuda->shared_data.pair.coeff5 = c;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulCutCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
MYDBG( printf("PairBuckCoulCutCuda compute start\n"); fflush(stdout);)
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairBuckCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
MYDBG( printf("PairBuckCoulCutCuda compute end\n"); fflush(stdout);)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulCutCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairBuckCoulCut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulCutCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairBuckCoulCut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairBuckCoulCutCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style buck/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coul_global * cut_coul_global;
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairBuckCoulCutCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list\n");)
|
||||
PairBuckCoulCut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairBuckCoulCutCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairBuckCoulCut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(buck/coul/cut/cuda,PairBuckCoulCutCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BUCK_COUL_CUT_CUDA_H
|
||||
#define LMP_PAIR_BUCK_COUL_CUT_CUDA_H
|
||||
|
||||
#include "pair_buck_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBuckCoulCutCuda : public PairBuckCoulCut
|
||||
{
|
||||
public:
|
||||
PairBuckCoulCutCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,184 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_buck_coul_long_cuda.h"
|
||||
#include "pair_buck_coul_long_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBuckCoulLongCuda::PairBuckCoulLongCuda(LAMMPS *lmp) : PairBuckCoulLong(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulLongCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairBuckCoulLong::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.coeff1 = rhoinv;
|
||||
cuda->shared_data.pair.coeff2 = buck1;
|
||||
cuda->shared_data.pair.coeff3 = buck2;
|
||||
cuda->shared_data.pair.coeff4 = a;
|
||||
cuda->shared_data.pair.coeff5 = c;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulLongCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
MYDBG( printf("PairBuckCoulLongCuda compute start\n"); fflush(stdout);)
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairBuckCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
MYDBG( printf("PairBuckCoulLongCuda compute end\n"); fflush(stdout);)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulLongCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairBuckCoulLong::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCoulLongCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairBuckCoulLong::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairBuckCoulLongCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style buck/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
cuda->shared_data.pair.g_ewald=g_ewald;
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairBuckCoulLongCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list\n");)
|
||||
PairBuckCoulLong::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairBuckCoulLongCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairBuckCoulLong::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(buck/coul/long/cuda,PairBuckCoulLongCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BUCK_COUL_LONG_CUDA_H
|
||||
#define LMP_PAIR_BUCK_COUL_LONG_CUDA_H
|
||||
|
||||
#include "pair_buck_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBuckCoulLongCuda : public PairBuckCoulLong
|
||||
{
|
||||
public:
|
||||
PairBuckCoulLongCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,169 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_buck_cuda.h"
|
||||
#include "pair_buck_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBuckCuda::PairBuckCuda(LAMMPS *lmp) : PairBuck(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairBuck::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut;
|
||||
cuda->shared_data.pair.coeff1 = rhoinv;
|
||||
cuda->shared_data.pair.coeff2 = buck1;
|
||||
cuda->shared_data.pair.coeff3 = buck2;
|
||||
cuda->shared_data.pair.coeff4 = a;
|
||||
cuda->shared_data.pair.coeff5 = c;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
MYDBG( printf("PairBuckCuda compute start\n"); fflush(stdout);)
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairBuckCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
MYDBG( printf("PairBuckCuda compute end\n"); fflush(stdout);)
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairBuck::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBuckCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairBuck::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairBuckCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style buck/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (strcmp(update->integrate_style,"respa") == 0) error->all("Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairBuckCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairBuckCuda::init_list\n");)
|
||||
PairBuck::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairBuckCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairBuckCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairBuck::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(buck/cuda,PairBuckCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BUCK_CUDA_H
|
||||
#define LMP_PAIR_BUCK_CUDA_H
|
||||
|
||||
#include "pair_buck.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBuckCuda : public PairBuck
|
||||
{
|
||||
public:
|
||||
PairBuckCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,204 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_cg_cmm_coul_cut_cuda.h"
|
||||
#include "pair_cg_cmm_coul_cut_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCoulCutCuda::PairCGCMMCoulCutCuda(LAMMPS *lmp) : PairCGCMMCoulCut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cg_type_double = NULL;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulCutCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairCGCMMCoulCut::allocate();
|
||||
int n = atom->ntypes;
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
|
||||
|
||||
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
|
||||
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.coeff5 = cg_type_double;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
for (int i = 1; i <= n; i++) {
|
||||
for (int j = i; j <= n; j++) {
|
||||
cg_type_double[i][j] = cg_type[i][j];
|
||||
cg_type_double[j][i] = cg_type[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulCutCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairCGCMMCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulCutCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulCut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulCutCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulCut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairCGCMMCoulCutCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
cut_respa=NULL;
|
||||
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
|
||||
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairCGCMMCoulCutCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_list\n");)
|
||||
PairCGCMMCoulCut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulCutCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairCGCMMCoulCutCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairCGCMMCoulCut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/coul/cut/cuda,PairCGCMMCoulCutCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_CG_CMM_COUL_CUT_CUDA_H
|
||||
#define PAIR_CG_CMM_COUL_CUT_CUDA_H
|
||||
|
||||
#include "pair_cg_cmm_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMCoulCutCuda : public PairCGCMMCoulCut
|
||||
{
|
||||
public:
|
||||
PairCGCMMCoulCutCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
double** cg_type_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,204 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_cg_cmm_coul_debye_cuda.h"
|
||||
#include "pair_cg_cmm_coul_debye_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCoulDebyeCuda::PairCGCMMCoulDebyeCuda(LAMMPS *lmp) : PairCGCMMCoulCut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cg_type_double = NULL;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairCGCMMCoulCut::allocate();
|
||||
int n = atom->ntypes;
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
|
||||
|
||||
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
|
||||
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.coeff5 = cg_type_double;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
for (int i = 1; i <= n; i++) {
|
||||
for (int j = i; j <= n; j++) {
|
||||
cg_type_double[i][j] = cg_type[i][j];
|
||||
cg_type_double[j][i] = cg_type[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairCGCMMCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulCut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulCut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
cut_respa=NULL;
|
||||
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
|
||||
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_list\n");)
|
||||
PairCGCMMCoulCut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulDebyeCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairCGCMMCoulDebyeCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairCGCMMCoulCut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/coul/debye/cuda,PairCGCMMCoulDebyeCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_CG_CMM_COUL_DEBYE_CUDA_H
|
||||
#define PAIR_CG_CMM_COUL_DEBYE_CUDA_H
|
||||
|
||||
#include "pair_cg_cmm_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMCoulDebyeCuda : public PairCGCMMCoulCut
|
||||
{
|
||||
public:
|
||||
PairCGCMMCoulDebyeCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
double** cg_type_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,206 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_cg_cmm_coul_long_cuda.h"
|
||||
#include "pair_cg_cmm_coul_long_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCoulLongCuda::PairCGCMMCoulLongCuda(LAMMPS *lmp) : PairCGCMMCoulLong(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cg_type_double = NULL;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairCGCMMCoulLong::allocate();
|
||||
int n = atom->ntypes;
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
|
||||
|
||||
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
|
||||
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.coeff5 = cg_type_double;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
for (int i = 1; i <= n; i++) {
|
||||
for (int j = i; j <= n; j++) {
|
||||
cg_type_double[i][j] = cg_type[i][j];
|
||||
cg_type_double[j][i] = cg_type[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairCGCMMCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulLong::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairCGCMMCoulLong::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairCGCMMCoulLongCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
cuda->shared_data.pair.g_ewald=g_ewald;
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
cut_respa=NULL;
|
||||
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairCGCMMCoulLongCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_list\n");)
|
||||
PairCGCMMCoulLong::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairCGCMMCoulLongCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairCGCMMCoulLongCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairCGCMMCoulLong::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/coul/long/cuda,PairCGCMMCoulLongCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_CG_CMM_COUL_LONG_CUDA_H
|
||||
#define PAIR_CG_CMM_COUL_LONG_CUDA_H
|
||||
|
||||
#include "pair_cg_cmm_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMCoulLongCuda : public PairCGCMMCoulLong
|
||||
{
|
||||
public:
|
||||
PairCGCMMCoulLongCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
double** cg_type_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,201 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_cg_cmm_cuda.h"
|
||||
#include "pair_cg_cmm_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCuda::PairCGCMMCuda(LAMMPS *lmp) : PairCGCMM(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cg_type_double = NULL;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairCGCMM::allocate();
|
||||
int n = atom->ntypes;
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
|
||||
|
||||
memory->create(cg_type_double,n+1,n+1,"paircg:cgtypedouble");
|
||||
|
||||
cuda->shared_data.pair.cut = cut;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.coeff5 = cg_type_double;
|
||||
/*cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_cg_type_double_gm = new cCudaData<double, F_FLOAT, x> ((double*)cg_type_double, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));*/
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
}
|
||||
for (int i = 1; i <= n; i++) {
|
||||
for (int j = i; j <= n; j++) {
|
||||
cg_type_double[i][j] = cg_type[i][j];
|
||||
cg_type_double[j][i] = cg_type[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairCGCMMCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairCGCMM::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairCGCMM::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairCGCMMCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
cut_respa=NULL;
|
||||
|
||||
MYDBG(printf("# CUDA PairCGCMMCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairCGCMMCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairCGCMMCuda::init_list\n");)
|
||||
PairCGCMM::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairCGCMMCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairCGCMMCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairCGCMM::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/cuda,PairCGCMMCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_CG_CMM_CUDA_H
|
||||
#define PAIR_CG_CMM_CUDA_H
|
||||
|
||||
#include "pair_cg_cmm.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMCuda : public PairCGCMM
|
||||
{
|
||||
public:
|
||||
PairCGCMMCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
double** cg_type_double;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_cg_type_double_gm;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,326 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_eam_alloy_cuda.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairEAMAlloyCuda::PairEAMAlloyCuda(LAMMPS *lmp) : PairEAMCuda(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
one_coeff = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
read DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
if (!allocated) allocate();
|
||||
|
||||
if (narg != 3 + atom->ntypes)
|
||||
error->all("Incorrect args for pair coefficients");
|
||||
|
||||
// insure I,J args are * *
|
||||
|
||||
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
|
||||
error->all("Incorrect args for pair coefficients");
|
||||
|
||||
// read EAM setfl file
|
||||
|
||||
if (setfl) {
|
||||
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
|
||||
delete [] setfl->elements;
|
||||
delete [] setfl->mass;
|
||||
memory->destroy(setfl->frho);
|
||||
memory->destroy(setfl->rhor);
|
||||
memory->destroy(setfl->z2r);
|
||||
delete setfl;
|
||||
}
|
||||
setfl = new Setfl();
|
||||
read_file(arg[2]);
|
||||
|
||||
// read args that map atom types to elements in potential file
|
||||
// map[i] = which element the Ith atom type is, -1 if NULL
|
||||
|
||||
for (i = 3; i < narg; i++) {
|
||||
if (strcmp(arg[i],"NULL") == 0) {
|
||||
map[i-2] = -1;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < setfl->nelements; j++)
|
||||
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
|
||||
if (j < setfl->nelements) map[i-2] = j;
|
||||
else error->all("No matching element in EAM potential file");
|
||||
}
|
||||
|
||||
// clear setflag since coeff() called once with I,J = * *
|
||||
|
||||
int n = atom->ntypes;
|
||||
for (i = 1; i <= n; i++)
|
||||
for (j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
// set setflag i,j for type pairs where both are mapped to elements
|
||||
// set mass of atom type if i = j
|
||||
|
||||
int count = 0;
|
||||
for (i = 1; i <= n; i++) {
|
||||
for (j = i; j <= n; j++) {
|
||||
if (map[i] >= 0 && map[j] >= 0) {
|
||||
setflag[i][j] = 1;
|
||||
if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all("Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
read a multi-element DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyCuda::read_file(char *filename)
|
||||
{
|
||||
Setfl *file = setfl;
|
||||
|
||||
// open potential file
|
||||
|
||||
int me = comm->me;
|
||||
FILE *fptr;
|
||||
char line[MAXLINE];
|
||||
|
||||
if (me == 0) {
|
||||
fptr = fopen(filename,"r");
|
||||
if (fptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Cannot open EAM potential file %s",filename);
|
||||
error->one(str);
|
||||
}
|
||||
}
|
||||
|
||||
// read and broadcast header
|
||||
// extract element names from nelements line
|
||||
|
||||
int n;
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
n = strlen(line) + 1;
|
||||
}
|
||||
MPI_Bcast(&n,1,MPI_INT,0,world);
|
||||
MPI_Bcast(line,n,MPI_CHAR,0,world);
|
||||
|
||||
sscanf(line,"%d",&file->nelements);
|
||||
int nwords = atom->count_words(line);
|
||||
if (nwords != file->nelements + 1)
|
||||
error->all("Incorrect element names in EAM potential file");
|
||||
|
||||
char **words = new char*[file->nelements+1];
|
||||
nwords = 0;
|
||||
char *first = strtok(line," \t\n\r\f");
|
||||
while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
|
||||
|
||||
file->elements = new char*[file->nelements];
|
||||
for (int i = 0; i < file->nelements; i++) {
|
||||
n = strlen(words[i]) + 1;
|
||||
file->elements[i] = new char[n];
|
||||
strcpy(file->elements[i],words[i]);
|
||||
}
|
||||
delete [] words;
|
||||
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg %d %lg %lg",
|
||||
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
|
||||
}
|
||||
|
||||
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
|
||||
|
||||
file->mass = new double[file->nelements];
|
||||
memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
|
||||
memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
|
||||
memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
|
||||
"pair:z2r");
|
||||
int i,j,tmp;
|
||||
for (i = 0; i < file->nelements; i++) {
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
|
||||
}
|
||||
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
|
||||
|
||||
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
|
||||
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
|
||||
if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
|
||||
MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
for (i = 0; i < file->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
|
||||
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
// close the potential file
|
||||
|
||||
if (me == 0) fclose(fptr);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy read-in setfl potential to standard array format
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyCuda::file2array()
|
||||
{
|
||||
int i,j,m,n;
|
||||
int ntypes = atom->ntypes;
|
||||
|
||||
// set function params directly from setfl file
|
||||
|
||||
nrho = setfl->nrho;
|
||||
nr = setfl->nr;
|
||||
drho = setfl->drho;
|
||||
dr = setfl->dr;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup frho arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate frho arrays
|
||||
// nfrho = # of setfl elements + 1 for zero array
|
||||
|
||||
nfrho = setfl->nelements + 1;
|
||||
memory->destroy(frho);
|
||||
memory->create(frho,nfrho,nrho+1,"pair:frho");
|
||||
|
||||
// copy each element's frho to global frho
|
||||
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
|
||||
|
||||
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
|
||||
// this is necessary b/c fp is still computed for non-EAM atoms
|
||||
|
||||
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
|
||||
|
||||
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
|
||||
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
|
||||
// then map it to last frho array of zeroes
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
if (map[i] >= 0) type2frho[i] = map[i];
|
||||
else type2frho[i] = nfrho-1;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup rhor arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate rhor arrays
|
||||
// nrhor = # of setfl elements
|
||||
|
||||
nrhor = setfl->nelements;
|
||||
memory->destroy(rhor);
|
||||
memory->create(rhor,nrhor,nr+1,"pair:rhor");
|
||||
|
||||
// copy each element's rhor to global rhor
|
||||
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
|
||||
|
||||
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
|
||||
// for setfl files, I,J mapping only depends on I
|
||||
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
for (j = 1; j <= ntypes; j++)
|
||||
type2rhor[i][j] = map[i];
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup z2r arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate z2r arrays
|
||||
// nz2r = N*(N+1)/2 where N = # of setfl elements
|
||||
|
||||
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
|
||||
memory->destroy(z2r);
|
||||
memory->create(z2r,nz2r,nr+1,"pair:z2r");
|
||||
|
||||
// copy each element pair z2r to global z2r, only for I >= J
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
|
||||
// set of z2r arrays only fill lower triangular Nelement matrix
|
||||
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
|
||||
// swap indices when irow < icol to stay lower triangular
|
||||
// if map = -1 (non-EAM atom in pair hybrid):
|
||||
// type2z2r is not used by non-opt
|
||||
// but set type2z2r to 0 since accessed by opt
|
||||
|
||||
int irow,icol;
|
||||
for (i = 1; i <= ntypes; i++) {
|
||||
for (j = 1; j <= ntypes; j++) {
|
||||
irow = map[i];
|
||||
icol = map[j];
|
||||
if (irow == -1 || icol == -1) {
|
||||
type2z2r[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
if (irow < icol) {
|
||||
irow = map[j];
|
||||
icol = map[i];
|
||||
}
|
||||
n = 0;
|
||||
for (m = 0; m < irow; m++) n += m + 1;
|
||||
n += icol;
|
||||
type2z2r[i][j] = n;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(eam/alloy/cuda,PairEAMAlloyCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_EAM_CUDA_ALLOY_H
|
||||
#define LMP_PAIR_EAM_CUDA_ALLOY_H
|
||||
|
||||
#include "pair_eam_cuda.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// use virtual public since this class is parent in multiple inheritance
|
||||
|
||||
class PairEAMAlloyCuda : virtual public PairEAMCuda {
|
||||
public:
|
||||
PairEAMAlloyCuda(class LAMMPS *);
|
||||
virtual ~PairEAMAlloyCuda() {}
|
||||
void coeff(int, char **);
|
||||
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void read_file(char *);
|
||||
void file2array();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,239 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_eam_cuda.h"
|
||||
#include "pair_eam_cuda_cu.h"
|
||||
#include "pair_virial_compute_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.override_block_per_atom = 0;
|
||||
|
||||
cuda->setSystemParams();
|
||||
cu_rho=NULL;
|
||||
cu_fp=NULL;
|
||||
cu_frho_spline = NULL;
|
||||
cu_z2r_spline = NULL;
|
||||
cu_rhor_spline = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairEAM::allocate();
|
||||
cuda->shared_data.pair.cutsq = cutsq;
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->shared_data.pair.collect_forces_later = 0;
|
||||
if (atom->nmax > nmax) {
|
||||
memory->destroy(rho);
|
||||
memory->destroy(fp);
|
||||
nmax = atom->nmax;
|
||||
memory->create(rho,nmax,"pair:rho");
|
||||
memory->create(fp,nmax,"pair:fp");
|
||||
delete cu_rho;
|
||||
delete cu_fp;
|
||||
cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
|
||||
cu_fp = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
|
||||
Cuda_PairEAMCuda_Init(&cuda->shared_data,rdr,rdrho,nfrho,nrhor,nr,nrho,nz2r,
|
||||
cu_frho_spline->dev_data(),cu_rhor_spline->dev_data(),cu_z2r_spline->dev_data(),
|
||||
cu_rho->dev_data(),cu_fp->dev_data(),type2frho,type2z2r,type2rhor);
|
||||
}
|
||||
|
||||
|
||||
|
||||
if(eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
|
||||
comm->forward_comm_pair(this);
|
||||
Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
|
||||
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairEAM::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairEAM::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairEAMCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairEAMCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
file2array();
|
||||
array2spline();
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
delete cu_rhor_spline;
|
||||
delete cu_z2r_spline;
|
||||
delete cu_frho_spline;
|
||||
|
||||
cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline,nrhor,nr+1,EAM_COEFF_LENGTH);
|
||||
cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline,nz2r,nr+1,EAM_COEFF_LENGTH);
|
||||
cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline,nfrho,nrho+1,EAM_COEFF_LENGTH);
|
||||
|
||||
cu_rhor_spline->upload();
|
||||
cu_z2r_spline->upload();
|
||||
cu_frho_spline->upload();
|
||||
|
||||
MYDBG(printf("# CUDA PairEAMCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairEAMCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
|
||||
PairEAM::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairEAMCuda::array2spline()
|
||||
{
|
||||
rdr = 1.0/dr;
|
||||
rdrho = 1.0/drho;
|
||||
|
||||
memory->destroy(frho_spline);
|
||||
memory->destroy(rhor_spline);
|
||||
memory->destroy(z2r_spline);
|
||||
|
||||
memory->create(frho_spline,nfrho,nrho+1,7,"pair:frho");
|
||||
memory->create(rhor_spline,nrhor,nr+1,7,"pair:rhor");
|
||||
memory->create(z2r_spline,nz2r,nr+1,7,"pair:z2r");
|
||||
|
||||
for (int i = 0; i < nfrho; i++){
|
||||
interpolate(nrho,drho,frho[i],frho_spline[i]);
|
||||
for(int j=0;j<nrho+1;j++)
|
||||
frho_spline[i][j][7]=frho_spline[i][j][3];
|
||||
}
|
||||
|
||||
for (int i = 0; i < nrhor; i++){
|
||||
interpolate(nr,dr,rhor[i],rhor_spline[i]);
|
||||
for(int j=0;j<nr+1;j++)
|
||||
rhor_spline[i][j][7]=rhor_spline[i][j][3];
|
||||
}
|
||||
|
||||
for (int i = 0; i < nz2r; i++){
|
||||
interpolate(nr,dr,z2r[i],z2r_spline[i]);
|
||||
for(int j=0;j<nr+1;j++)
|
||||
z2r_spline[i][j][7]=z2r_spline[i][j][3];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int PairEAMCuda::pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc)
|
||||
{
|
||||
Cuda_PairEAMCuda_PackComm(&cuda->shared_data,n,*iswap,buf);
|
||||
if(sizeof(F_FLOAT)<sizeof(double)) return 1;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMCuda::unpack_comm(int n, int first, double *buf)
|
||||
{
|
||||
Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data,n,first,buf,cu_fp->dev_data());
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(eam/cuda,PairEAMCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_EAM_CUDA_H
|
||||
#define PAIR_EAM_CUDA_H
|
||||
|
||||
#include "cuda_data.h"
|
||||
#include "pair_eam.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairEAMCuda : public PairEAM
|
||||
{
|
||||
public:
|
||||
PairEAMCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void array2spline();
|
||||
int pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc);
|
||||
void unpack_comm(int n, int first, double *buf);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
cCudaData<double, F_FLOAT, x>* cu_rho;
|
||||
cCudaData<double, F_FLOAT, x>* cu_fp;
|
||||
cCudaData<double, F_FLOAT, xyz>* cu_rhor_spline;
|
||||
cCudaData<double, F_FLOAT, xyz>* cu_z2r_spline;
|
||||
cCudaData<double, F_FLOAT, xyz>* cu_frho_spline;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,335 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Tim Lau (MIT)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_eam_fs_cuda.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairEAMFSCuda::PairEAMFSCuda(LAMMPS *lmp) : PairEAMCuda(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
one_coeff = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
read EAM Finnis-Sinclair file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
if (!allocated) allocate();
|
||||
|
||||
if (narg != 3 + atom->ntypes)
|
||||
error->all("Incorrect args for pair coefficients");
|
||||
|
||||
// insure I,J args are * *
|
||||
|
||||
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
|
||||
error->all("Incorrect args for pair coefficients");
|
||||
|
||||
// read EAM Finnis-Sinclair file
|
||||
|
||||
if (fs) {
|
||||
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
|
||||
delete [] fs->elements;
|
||||
delete [] fs->mass;
|
||||
memory->destroy(fs->frho);
|
||||
memory->destroy(fs->rhor);
|
||||
memory->destroy(fs->z2r);
|
||||
delete fs;
|
||||
}
|
||||
fs = new Fs();
|
||||
read_file(arg[2]);
|
||||
|
||||
// read args that map atom types to elements in potential file
|
||||
// map[i] = which element the Ith atom type is, -1 if NULL
|
||||
|
||||
for (i = 3; i < narg; i++) {
|
||||
if (strcmp(arg[i],"NULL") == 0) {
|
||||
map[i-2] = -1;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < fs->nelements; j++)
|
||||
if (strcmp(arg[i],fs->elements[j]) == 0) break;
|
||||
if (j < fs->nelements) map[i-2] = j;
|
||||
else error->all("No matching element in EAM potential file");
|
||||
}
|
||||
|
||||
// clear setflag since coeff() called once with I,J = * *
|
||||
|
||||
int n = atom->ntypes;
|
||||
for (i = 1; i <= n; i++)
|
||||
for (j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
// set setflag i,j for type pairs where both are mapped to elements
|
||||
// set mass of atom type if i = j
|
||||
|
||||
int count = 0;
|
||||
for (i = 1; i <= n; i++) {
|
||||
for (j = i; j <= n; j++) {
|
||||
if (map[i] >= 0 && map[j] >= 0) {
|
||||
setflag[i][j] = 1;
|
||||
if (i == j) atom->set_mass(i,fs->mass[map[i]]);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all("Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
read a multi-element DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSCuda::read_file(char *filename)
|
||||
{
|
||||
Fs *file = fs;
|
||||
|
||||
// open potential file
|
||||
|
||||
int me = comm->me;
|
||||
FILE *fptr;
|
||||
char line[MAXLINE];
|
||||
|
||||
if (me == 0) {
|
||||
fptr = fopen(filename,"r");
|
||||
if (fptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Cannot open EAM potential file %s",filename);
|
||||
error->one(str);
|
||||
}
|
||||
}
|
||||
|
||||
// read and broadcast header
|
||||
// extract element names from nelements line
|
||||
|
||||
int n;
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
n = strlen(line) + 1;
|
||||
}
|
||||
MPI_Bcast(&n,1,MPI_INT,0,world);
|
||||
MPI_Bcast(line,n,MPI_CHAR,0,world);
|
||||
|
||||
sscanf(line,"%d",&file->nelements);
|
||||
int nwords = atom->count_words(line);
|
||||
if (nwords != file->nelements + 1)
|
||||
error->all("Incorrect element names in EAM potential file");
|
||||
|
||||
char **words = new char*[file->nelements+1];
|
||||
nwords = 0;
|
||||
char *first = strtok(line," \t\n\r\f");
|
||||
while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
|
||||
|
||||
file->elements = new char*[file->nelements];
|
||||
for (int i = 0; i < file->nelements; i++) {
|
||||
n = strlen(words[i]) + 1;
|
||||
file->elements[i] = new char[n];
|
||||
strcpy(file->elements[i],words[i]);
|
||||
}
|
||||
delete [] words;
|
||||
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg %d %lg %lg",
|
||||
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
|
||||
}
|
||||
|
||||
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
|
||||
|
||||
file->mass = new double[file->nelements];
|
||||
memory->create(file->frho,file->nelements,file->nrho+1,
|
||||
"pair:frho");
|
||||
memory->create(file->rhor,file->nelements,file->nelements,
|
||||
file->nr+1,"pair:rhor");
|
||||
memory->create(file->z2r,file->nelements,file->nelements,
|
||||
file->nr+1,"pair:z2r");
|
||||
int i,j,tmp;
|
||||
for (i = 0; i < file->nelements; i++) {
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
|
||||
}
|
||||
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
|
||||
|
||||
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
|
||||
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
|
||||
|
||||
for (j = 0; j < file->nelements; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
|
||||
MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < file->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
|
||||
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
// close the potential file
|
||||
|
||||
if (me == 0) fclose(fptr);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy read-in setfl potential to standard array format
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSCuda::file2array()
|
||||
{
|
||||
int i,j,m,n;
|
||||
int ntypes = atom->ntypes;
|
||||
|
||||
// set function params directly from fs file
|
||||
|
||||
nrho = fs->nrho;
|
||||
nr = fs->nr;
|
||||
drho = fs->drho;
|
||||
dr = fs->dr;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup frho arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate frho arrays
|
||||
// nfrho = # of fs elements + 1 for zero array
|
||||
|
||||
nfrho = fs->nelements + 1;
|
||||
memory->destroy(frho);
|
||||
memory->create(frho,nfrho,nrho+1,"pair:frho");
|
||||
|
||||
// copy each element's frho to global frho
|
||||
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
|
||||
|
||||
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
|
||||
// this is necessary b/c fp is still computed for non-EAM atoms
|
||||
|
||||
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
|
||||
|
||||
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
|
||||
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
|
||||
// then map it to last frho array of zeroes
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
if (map[i] >= 0) type2frho[i] = map[i];
|
||||
else type2frho[i] = nfrho-1;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup rhor arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate rhor arrays
|
||||
// nrhor = square of # of fs elements
|
||||
|
||||
nrhor = fs->nelements * fs->nelements;
|
||||
memory->destroy(rhor);
|
||||
memory->create(rhor,nrhor,nr+1,"pair:rhor");
|
||||
|
||||
// copy each element pair rhor to global rhor
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (j = 0; j < fs->nelements; j++) {
|
||||
for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
|
||||
// for fs files, there is a full NxN set of rhor arrays
|
||||
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
for (j = 1; j <= ntypes; j++)
|
||||
type2rhor[i][j] = map[i] * fs->nelements + map[j];
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup z2r arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate z2r arrays
|
||||
// nz2r = N*(N+1)/2 where N = # of fs elements
|
||||
|
||||
nz2r = fs->nelements * (fs->nelements+1) / 2;
|
||||
memory->destroy(z2r);
|
||||
memory->create(z2r,nz2r,nr+1,"pair:z2r");
|
||||
|
||||
// copy each element pair z2r to global z2r, only for I >= J
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
|
||||
// set of z2r arrays only fill lower triangular Nelement matrix
|
||||
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
|
||||
// swap indices when irow < icol to stay lower triangular
|
||||
// if map = -1 (non-EAM atom in pair hybrid):
|
||||
// type2z2r is not used by non-opt
|
||||
// but set type2z2r to 0 since accessed by opt
|
||||
|
||||
int irow,icol;
|
||||
for (i = 1; i <= ntypes; i++) {
|
||||
for (j = 1; j <= ntypes; j++) {
|
||||
irow = map[i];
|
||||
icol = map[j];
|
||||
if (irow == -1 || icol == -1) {
|
||||
type2z2r[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
if (irow < icol) {
|
||||
irow = map[j];
|
||||
icol = map[i];
|
||||
}
|
||||
n = 0;
|
||||
for (m = 0; m < irow; m++) n += m + 1;
|
||||
n += icol;
|
||||
type2z2r[i][j] = n;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(eam/fs/cuda,PairEAMFSCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_EAM_FS_CUDA_H
|
||||
#define LMP_PAIR_EAM_FS_CUDA_H
|
||||
|
||||
#include "pair_eam_cuda.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// use virtual public since this class is parent in multiple inheritance
|
||||
|
||||
class PairEAMFSCuda : virtual public PairEAMCuda {
|
||||
public:
|
||||
PairEAMFSCuda(class LAMMPS *);
|
||||
virtual ~PairEAMFSCuda() {}
|
||||
void coeff(int, char **);
|
||||
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void read_file(char *);
|
||||
void file2array();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,247 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_gran_hooke_cuda.h"
|
||||
#include "pair_gran_hooke_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "modify.h"
|
||||
#include "fix_pour.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairGranHookeCuda::PairGranHookeCuda(LAMMPS *lmp) : PairGranHooke(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairGranHookeCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairGranHooke::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
int n = atom->ntypes;
|
||||
cuda->shared_data.pair.cutsq = cutsq;
|
||||
memory->create(cuda->shared_data.pair.coeff1,n+1,n+1,
|
||||
"pair:cuda_coeff1");
|
||||
memory->create(cuda->shared_data.pair.coeff2,
|
||||
n+1,n+1,"pair:cuda_coeff2");
|
||||
cuda->shared_data.pair.coeff1[0][0]=kn;
|
||||
cuda->shared_data.pair.coeff1[0][1]=kt;
|
||||
cuda->shared_data.pair.coeff1[1][0]=gamman;
|
||||
cuda->shared_data.pair.coeff1[1][1]=gammat;
|
||||
cuda->shared_data.pair.coeff2[0][0]=xmu;
|
||||
cuda->shared_data.pair.coeff2[0][1]=dampflag;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGranHookeCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
//cuda->cu_debugdata->memset_device(0);
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairGranHookeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
//cuda->cu_debugdata->download();
|
||||
//printf("%lf %lf %lf %lf %lf %lf\n",1.0e-6*cuda->debugdata[0],1.0e-6*cuda->debugdata[1],1.0e-6*cuda->debugdata[2],1.0e-6*cuda->debugdata[3],1.0e-6*cuda->debugdata[4],1.0e-6*cuda->debugdata[5]);
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGranHookeCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairGranHooke::settings(narg, arg);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGranHookeCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairGranHooke::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairGranHookeCuda::init_style()
|
||||
{
|
||||
int i;
|
||||
MYDBG(printf("# CUDA PairGranHookeCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->gran = 1;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
if (!atom->radius_flag || !atom->omega_flag || !atom->torque_flag)
|
||||
error->all("Pair granular requires atom attributes radius, omega, torque");
|
||||
if (comm->ghost_velocity == 0)
|
||||
error->all("Pair granular requires ghost atoms store velocity");
|
||||
|
||||
// need a half neigh list and optionally a granular history neigh list
|
||||
|
||||
dt = update->dt;
|
||||
|
||||
|
||||
|
||||
// check for Fix freeze and set freeze_group_bit
|
||||
|
||||
for (i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
|
||||
if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
|
||||
else freeze_group_bit = 0;
|
||||
|
||||
cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
|
||||
// check for Fix pour and set pour_type and pour_maxdiam
|
||||
|
||||
int pour_type = 0;
|
||||
double pour_maxrad = 0.0;
|
||||
for (i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"pour") == 0) break;
|
||||
if (i < modify->nfix) {
|
||||
pour_type = ((FixPour *) modify->fix[i])->ntype;
|
||||
pour_maxrad = ((FixPour *) modify->fix[i])->radius_hi;
|
||||
}
|
||||
|
||||
// set maxrad_dynamic and maxrad_frozen for each type
|
||||
// include future Fix pour particles as dynamic
|
||||
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
onerad_dynamic[i] = onerad_frozen[i] = 0.0;
|
||||
if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;
|
||||
|
||||
double *radius = atom->radius;
|
||||
int *mask = atom->mask;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
for (i = 0; i < nlocal; i++){
|
||||
if (mask[i] & freeze_group_bit)
|
||||
onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
|
||||
else
|
||||
onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
|
||||
}
|
||||
|
||||
MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
|
||||
MPI_DOUBLE,MPI_MAX,world);
|
||||
MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
|
||||
MPI_DOUBLE,MPI_MAX,world);
|
||||
|
||||
MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairGranHookeCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairGranHookeCuda::init_list\n");)
|
||||
PairGranHooke::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairGranHookeCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairGranHookeCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairGranHooke::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(gran/hooke/cuda,PairGranHookeCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_GRAN_HOOKE_CUDA_H
|
||||
#define PAIR_GRAN_HOOKE_CUDA_H
|
||||
|
||||
#include "pair_gran_hooke.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairGranHookeCuda : public PairGranHooke
|
||||
{
|
||||
public:
|
||||
PairGranHookeCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,184 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj96_cut_cuda.h"
|
||||
#include "pair_lj96_cut_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJ96CutCuda::PairLJ96CutCuda(LAMMPS *lmp) : PairLJ96Cut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJ96Cut::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJ96CutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJ96Cut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJ96Cut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJ96CutCuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJ96CutCuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
if (update->whichflag == 0 && strcmp(update->integrate_style,"respa") == 0) {
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
}
|
||||
|
||||
|
||||
cut_respa = NULL;
|
||||
MYDBG(printf("# CUDA PairLJ96CutCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairLJ96CutCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJ96CutCuda::init_list\n");)
|
||||
PairLJ96Cut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJ96CutCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJ96CutCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJ96Cut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj96/cut/cuda,PairLJ96CutCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_LJ96_CUT_CUDA_H
|
||||
#define PAIR_LJ96_CUT_CUDA_H
|
||||
|
||||
#include "pair_lj96_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJ96CutCuda : public PairLJ96Cut
|
||||
{
|
||||
public:
|
||||
PairLJ96CutCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,193 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_charmm_coul_charmm_cuda.h"
|
||||
#include "pair_lj_charmm_coul_charmm_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCharmmCoulCharmmCuda::PairLJCharmmCoulCharmmCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmm(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.use_block_per_atom = 0;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJCharmmCoulCharmm::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
}
|
||||
|
||||
Cuda_PairLJCharmmCoulCharmmCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulCharmm::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
|
||||
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
|
||||
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulCharmm::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
if(atom->molecular)
|
||||
{
|
||||
cuda->shared_data.pair.collect_forces_later = 1;
|
||||
}
|
||||
|
||||
int irequest;
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
|
||||
error->all("Pair inner cutoff >= Pair outer cutoff");
|
||||
|
||||
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
|
||||
cut_ljsq = cut_lj * cut_lj;
|
||||
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
|
||||
|
||||
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
|
||||
(cut_ljsq-cut_lj_innersq);
|
||||
denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
|
||||
(cut_coulsq-cut_coul_innersq);
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list\n");)
|
||||
PairLJCharmmCoulCharmm::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJCharmmCoulCharmm::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/charmm/coul/charmm/cuda,PairLJCharmmCoulCharmmCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H
|
||||
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_CUDA_H
|
||||
|
||||
#include "pair_lj_charmm_coul_charmm.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCharmmCoulCharmmCuda : public PairLJCharmmCoulCharmm
|
||||
{
|
||||
public:
|
||||
PairLJCharmmCoulCharmmCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,188 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_charmm_coul_charmm_implicit_cuda.h"
|
||||
#include "pair_lj_charmm_coul_charmm_implicit_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCharmmCoulCharmmImplicitCuda::PairLJCharmmCoulCharmmImplicitCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmmImplicit(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.collect_forces_later = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJCharmmCoulCharmmImplicit::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
}
|
||||
|
||||
Cuda_PairLJCharmmCoulCharmmImplicitCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulCharmmImplicit::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
|
||||
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
|
||||
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulCharmmImplicit::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
|
||||
error->all("Pair inner cutoff >= Pair outer cutoff");
|
||||
|
||||
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
|
||||
cut_ljsq = cut_lj * cut_lj;
|
||||
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
|
||||
|
||||
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
|
||||
(cut_ljsq-cut_lj_innersq);
|
||||
denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
|
||||
(cut_coulsq-cut_coul_innersq);
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list\n");)
|
||||
PairLJCharmmCoulCharmmImplicit::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulCharmmImplicitCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJCharmmCoulCharmmImplicit::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/charmm/coul/charmm/implicit/cuda,PairLJCharmmCoulCharmmImplicitCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H
|
||||
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_CUDA_H
|
||||
|
||||
#include "pair_lj_charmm_coul_charmm_implicit.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCharmmCoulCharmmImplicitCuda : public PairLJCharmmCoulCharmmImplicit
|
||||
{
|
||||
public:
|
||||
PairLJCharmmCoulCharmmImplicitCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,201 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_charmm_coul_long_cuda.h"
|
||||
#include "pair_lj_charmm_coul_long_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCharmmCoulLongCuda::PairLJCharmmCoulLongCuda(LAMMPS *lmp) : PairLJCharmmCoulLong(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->shared_data.pair.collect_forces_later = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulLongCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJCharmmCoulLong::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
//cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
cu_lj1_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj2_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj3_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
cu_lj4_gm = new cCudaData<double, F_FLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulLongCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
}
|
||||
|
||||
Cuda_PairLJCharmmCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulLongCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulLong::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (X_FLOAT) cut_lj;
|
||||
cuda->shared_data.pair.cut_coulsq_global = (X_FLOAT) cut_coulsq;
|
||||
cuda->shared_data.pair.cut_inner_global = (F_FLOAT) cut_lj_inner;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulLongCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJCharmmCoulLong::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulLongCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/charmm/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
if (cut_lj_inner >= cut_lj)
|
||||
error->all("Pair inner cutoff >= Pair outer cutoff");
|
||||
|
||||
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
|
||||
cut_ljsq = cut_lj * cut_lj;
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
|
||||
|
||||
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
|
||||
(cut_ljsq-cut_lj_innersq);
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
cuda->shared_data.pair.g_ewald=g_ewald;
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulLongCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list\n");)
|
||||
PairLJCharmmCoulLong::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJCharmmCoulLongCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJCharmmCoulLong::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/charmm/coul/long/cuda,PairLJCharmmCoulLongCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H
|
||||
#define LMP_PAIR_LJ_CHARMM_COUL_LONG_CUDA_H
|
||||
|
||||
#include "pair_lj_charmm_coul_long.h"
|
||||
#include "cuda_data.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCharmmCoulLongCuda : public PairLJCharmmCoulLong
|
||||
{
|
||||
public:
|
||||
PairLJCharmmCoulLongCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj1_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj2_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj3_gm;
|
||||
cCudaData<double , F_FLOAT , x >* cu_lj4_gm;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,167 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_class2_coul_cut_cuda.h"
|
||||
#include "pair_lj_class2_coul_cut_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJClass2CoulCutCuda::PairLJClass2CoulCutCuda(LAMMPS *lmp) : PairLJClass2CoulCut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulCutCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJClass2CoulCut::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulCutCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJClass2CoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulCutCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2CoulCut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulCutCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2CoulCut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJClass2CoulCutCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/cut/cuda requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
}
|
||||
|
||||
void PairLJClass2CoulCutCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list\n");)
|
||||
PairLJClass2CoulCut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJClass2CoulCutCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJClass2CoulCut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/class2/coul/cut/cuda,PairLJClass2CoulCutCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H
|
||||
#define LMP_PAIR_LJ_CLASS2_COUL_CUT_CUDA_H
|
||||
|
||||
#include "pair_lj_class2_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJClass2CoulCutCuda : public PairLJClass2CoulCut
|
||||
{
|
||||
public:
|
||||
PairLJClass2CoulCutCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,180 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_class2_coul_long_cuda.h"
|
||||
#include "pair_lj_class2_coul_long_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJClass2CoulLongCuda::PairLJClass2CoulLongCuda(LAMMPS *lmp) : PairLJClass2CoulLong(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulLongCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJClass2CoulLong::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulLongCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJClass2CoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulLongCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2CoulLong::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2CoulLongCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2CoulLong::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJClass2CoulLongCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/long requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cuda->shared_data.pair.cut_coul_global=cut_coul;
|
||||
cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
|
||||
// set rRESPA cutoffs
|
||||
|
||||
if (force->newton) error->warning("Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
cuda->shared_data.pair.g_ewald=g_ewald;
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
|
||||
if(ncoultablebits) error->warning("# CUDA: You asked for the useage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
|
||||
}
|
||||
|
||||
void PairLJClass2CoulLongCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list\n");)
|
||||
PairLJClass2CoulLong::init_list(id, ptr);
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJClass2CoulLongCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJClass2CoulLong::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/class2/coul/long/cuda,PairLJClass2CoulLongCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H
|
||||
#define LMP_PAIR_LJ_CLASS2_COUL_LONG_CUDA_H
|
||||
|
||||
#include "pair_lj_class2_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJClass2CoulLongCuda : public PairLJClass2CoulLong
|
||||
{
|
||||
public:
|
||||
PairLJClass2CoulLongCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,172 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_class2_cuda.h"
|
||||
#include "pair_lj_class2_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJClass2Cuda::PairLJClass2Cuda(LAMMPS *lmp) : PairLJClass2(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2Cuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJClass2::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2Cuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJClass2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2Cuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2Cuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJClass2::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJClass2Cuda::init_style()
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJClass2Cuda::init_style start\n"); )
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
//neighbor->style=0; //0=NSQ neighboring
|
||||
MYDBG(printf("# CUDA PairLJClass2Cuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairLJClass2Cuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJClass2Cuda::init_list\n");)
|
||||
PairLJClass2::init_list(id, ptr);
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
MYDBG(printf("# CUDA PairLJClass2Cuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJClass2Cuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJClass2::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/class2/cuda,PairLJClass2Cuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PAIR_LJ_CLASS2_CUDA_H
|
||||
#define PAIR_LJ_CLASS2_CUDA_H
|
||||
|
||||
#include "pair_lj_class2.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJClass2Cuda : public PairLJClass2
|
||||
{
|
||||
public:
|
||||
PairLJClass2Cuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,167 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_cut_coul_cut_cuda.h"
|
||||
#include "pair_lj_cut_coul_cut_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulCutCuda::PairLJCutCoulCutCuda(LAMMPS *lmp) : PairLJCutCoulCut(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJCutCoulCut::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJCutCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJCutCoulCut::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJCutCoulCut::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJCutCoulCutCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/cut/cuda requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
}
|
||||
|
||||
void PairLJCutCoulCutCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list\n");)
|
||||
PairLJCutCoulCut::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJCutCoulCutCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJCutCoulCut::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/coul/cut/cuda,PairLJCutCoulCutCuda)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H
|
||||
#define LMP_PAIR_LJ_CUT_COUL_CUT_CUDA_H
|
||||
|
||||
#include "pair_lj_cut_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCutCoulCutCuda : public PairLJCutCoulCut
|
||||
{
|
||||
public:
|
||||
PairLJCutCoulCutCuda(class LAMMPS *);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_list(int, class NeighList *);
|
||||
void init_style();
|
||||
void ev_setup(int eflag, int vflag);
|
||||
protected:
|
||||
class Cuda *cuda;
|
||||
void allocate();
|
||||
bool allocated2;
|
||||
class CudaNeighList* cuda_neigh_list;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,168 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
|
||||
Original Version:
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
|
||||
Contributing author: Paul Crozier (SNL)
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
USER-CUDA Package and associated modifications:
|
||||
https://sourceforge.net/projects/lammpscuda/
|
||||
|
||||
Christian Trott, christian.trott@tu-ilmenau.de
|
||||
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
||||
Theoretical Physics II, University of Technology Ilmenau, Germany
|
||||
|
||||
See the README file in the USER-CUDA directory.
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "pair_lj_cut_coul_debye_cuda.h"
|
||||
#include "pair_lj_cut_coul_debye_cuda_cu.h"
|
||||
#include "cuda_data.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "cuda_neigh_list.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "cuda.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulDebyeCuda::PairLJCutCoulDebyeCuda(LAMMPS *lmp) : PairLJCutCoulDebye(lmp)
|
||||
{
|
||||
cuda = lmp->cuda;
|
||||
if(cuda == NULL)
|
||||
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Use no '-a' command line argument, or '-a cuda'.");
|
||||
|
||||
allocated2 = false;
|
||||
cuda->shared_data.pair.cudable_force = 1;
|
||||
cuda->setSystemParams();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
remember pointer to arrays in cuda shared data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulDebyeCuda::allocate()
|
||||
{
|
||||
if(! allocated) PairLJCutCoulDebye::allocate();
|
||||
if(! allocated2)
|
||||
{
|
||||
allocated2 = true;
|
||||
cuda->shared_data.pair.cut = cut_lj;
|
||||
cuda->shared_data.pair.cut_coul= cut_coul;
|
||||
cuda->shared_data.pair.coeff1 = lj1;
|
||||
cuda->shared_data.pair.coeff2 = lj2;
|
||||
cuda->shared_data.pair.coeff3 = lj3;
|
||||
cuda->shared_data.pair.coeff4 = lj4;
|
||||
cuda->shared_data.pair.offset = offset;
|
||||
cuda->shared_data.pair.special_lj = force->special_lj;
|
||||
cuda->shared_data.pair.special_coul = force->special_coul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulDebyeCuda::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
if(eflag) cuda->cu_eng_vdwl->upload();
|
||||
if(eflag) cuda->cu_eng_coul->upload();
|
||||
if(vflag) cuda->cu_virial->upload();
|
||||
|
||||
Cuda_PairLJCutCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
|
||||
|
||||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
{
|
||||
if(eflag) cuda->cu_eng_vdwl->download();
|
||||
if(eflag) cuda->cu_eng_coul->download();
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulDebyeCuda::settings(int narg, char **arg)
|
||||
{
|
||||
PairLJCutCoulDebye::settings(narg, arg);
|
||||
cuda->shared_data.pair.cut_global = (F_FLOAT) cut_lj_global;
|
||||
cuda->shared_data.pair.cut_coul_global = (F_FLOAT) cut_coul_global;
|
||||
cuda->shared_data.pair.kappa = (F_FLOAT) kappa;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulDebyeCuda::coeff(int narg, char **arg)
|
||||
{
|
||||
PairLJCutCoulDebye::coeff(narg, arg);
|
||||
allocate();
|
||||
}
|
||||
|
||||
void PairLJCutCoulDebyeCuda::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/debye/cuda requires atom attribute q");
|
||||
// request regular or rRESPA neighbor lists
|
||||
|
||||
int irequest;
|
||||
|
||||
|
||||
irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->cudable = 1;
|
||||
|
||||
|
||||
cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
|
||||
|
||||
}
|
||||
|
||||
void PairLJCutCoulDebyeCuda::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list\n");)
|
||||
PairLJCutCoulDebye::init_list(id, ptr);
|
||||
#ifndef CUDA_USE_BINNING
|
||||
// right now we can only handle verlet (id 0), not respa
|
||||
if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
|
||||
// see Neighbor::init() for details on lammps lists' logic
|
||||
#endif
|
||||
MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list end\n");)
|
||||
}
|
||||
|
||||
void PairLJCutCoulDebyeCuda::ev_setup(int eflag, int vflag)
|
||||
{
|
||||
int maxeatomold=maxeatom;
|
||||
PairLJCutCoulDebye::ev_setup(eflag,vflag);
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax );}
|
||||
|
||||
if (eflag_atom && atom->nmax > maxeatomold)
|
||||
{delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6 );}
|
||||
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue