forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13906 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
c85a42ab6d
commit
21f8abda24
|
@ -21,6 +21,8 @@ awpmd antisymmetrized wave packet molecular dynamics, AWPMD package
|
|||
from Ilya Valuev (JIHT RAS)
|
||||
colvars collective variable module (Metadynamics, ABF and more)
|
||||
from Giacomo Fiorin and Jerome Henin (ICMS, Temple U)
|
||||
compress hook to system lib for performing I/O compression, COMPRESS pkg
|
||||
from Axel Kohlmeyer (Temple U)
|
||||
cuda NVIDIA GPU routines, USER-CUDA package
|
||||
from Christian Trott (U Tech Ilmenau)
|
||||
gpu general GPU routines, GPU package
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# This file contains the settings to build and link LAMMPS with
|
||||
# support for data compression libraries.
|
||||
#
|
||||
# When you build LAMMPS with the COMPRESS package installed, it will
|
||||
# use the 3 settings in this file. They should be set as follows.
|
||||
#
|
||||
# The compress_SYSLIB setting is for linking the compression library.
|
||||
# By default, the setting will point to zlib (-lz).
|
||||
#
|
||||
# The compress_SYSINC and compress_SYSPATH variables do not typically need
|
||||
# to be set, as compression libraries are usually installed as packages
|
||||
# in system locations. Otherwise, specify its directory via the
|
||||
# compress_SYSPATH variable, e.g. -Ldir or compress_SYSINC variable( -Idir)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
|
||||
# Settings that the LAMMPS build will import when this package is installed
|
||||
|
||||
compress_SYSINC =
|
||||
compress_SYSLIB = -lz
|
||||
compress_SYSPATH =
|
|
@ -68,8 +68,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
|
||||
|
||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||
|
@ -117,8 +120,12 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
|
||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h
|
||||
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
|
||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
||||
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h
|
||||
|
||||
|
||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||
|
||||
|
@ -680,6 +687,18 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OB
|
|||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin
|
||||
$(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
|
||||
|
||||
|
@ -704,6 +723,30 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_
|
|||
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_zbl.cu
|
||||
|
||||
$(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin
|
||||
$(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_cubic.cu
|
||||
|
||||
$(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin
|
||||
$(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||
|
||||
|
|
|
@ -57,8 +57,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
|
||||
|
||||
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
|
||||
|
@ -82,7 +85,10 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
|
||||
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
||||
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
|
||||
$(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_debye_cl.h
|
||||
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
|
||||
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
|
||||
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
|
||||
$(OBJ_DIR)/lj_cubic_cl.h
|
||||
|
||||
|
||||
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
|
@ -488,6 +494,15 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_
|
|||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(OCL) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
|
||||
|
||||
|
@ -506,6 +521,24 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ
|
|||
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/zbl_cl.h: lal_zbl.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh zbl $(PRE1_H) lal_zbl.cu $(OBJ_DIR)/zbl_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_cubic_cl.h: lal_lj_cubic.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_cubic $(PRE1_H) lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
|
||||
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
|
||||
|
||||
|
|
|
@ -23,4 +23,4 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ class UCL_Program {
|
|||
}
|
||||
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the default command queue/stream associated with this data
|
||||
inline command_queue & cq() { return _cq; }
|
||||
|
@ -315,6 +315,10 @@ class UCL_Kernel {
|
|||
/// Clear any arguments associated with the kernel
|
||||
inline void clear_args() { _num_args=0; }
|
||||
|
||||
/// Return the default command queue/stream associated with this data
|
||||
inline command_queue & cq() { return _cq; }
|
||||
/// Change the default command queue associated with matrix
|
||||
inline void cq(command_queue &cq_in) { _cq=cq_in; }
|
||||
#include "ucl_arg_kludge.h"
|
||||
|
||||
private:
|
||||
|
@ -370,7 +374,7 @@ inline int UCL_Kernel::set_function(UCL_Program &program, const char *function)
|
|||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int i=vstart; i<iend; i++)
|
||||
virial[j]+=engv[i];
|
||||
if (_vf_atom)
|
||||
if (_ilist==NULL) {
|
||||
if (_ilist==NULL) {
|
||||
for (int i=vstart, ii=0; i<iend; i++)
|
||||
vatom[ii++][j]+=engv[i];
|
||||
} else {
|
||||
|
|
|
@ -147,7 +147,8 @@ void BaseThreeT::clear_atomic() {
|
|||
#ifdef THREE_CONCURRENT
|
||||
ans2->clear();
|
||||
assert(ucl_device->num_queues()==_end_command_queue+1);
|
||||
ucl_device->pop_command_queue();
|
||||
// ucl_device will clean up the command queue in its destructor
|
||||
// ucl_device->pop_command_queue();
|
||||
#endif
|
||||
device->clear();
|
||||
}
|
||||
|
@ -183,7 +184,7 @@ int * BaseThreeT::reset_nbors(const int nall, const int inum, const int nlist,
|
|||
// Build neighbor list on device
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
|
||||
inline int BaseThreeT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag,
|
||||
|
@ -193,7 +194,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
|
|||
resize_atom(inum,nall,success);
|
||||
resize_local(nall,host_inum,nbor->max_nbors(),success);
|
||||
if (!success)
|
||||
return;
|
||||
return 1;
|
||||
atom->cast_copy_x(host_x,host_type);
|
||||
|
||||
int mn;
|
||||
|
@ -206,6 +207,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
|
|||
#endif
|
||||
if (bytes>_max_an_bytes)
|
||||
_max_an_bytes=bytes;
|
||||
return mn;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
||||
#define THREE_CONCURRENT
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
@ -129,7 +131,7 @@ class BaseThree {
|
|||
int *numj, int **firstneigh, bool &success);
|
||||
|
||||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
int build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
|
|
@ -337,7 +337,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
|||
#else
|
||||
std::string fs=toa(gpu->free_gigabytes())+"/";
|
||||
#endif
|
||||
|
||||
|
||||
if (_replica_me == 0 && screen) {
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"-------------------------------------\n");
|
||||
|
@ -362,7 +362,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
|||
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+
|
||||
toa(gpu->gigabytes(i))+" GB, "+toa(gpu->clock_rate(i))+" GHZ (";
|
||||
else
|
||||
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+
|
||||
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+
|
||||
toa(gpu->clock_rate(i))+" GHZ (";
|
||||
if (sizeof(PRECISION)==4) {
|
||||
if (sizeof(ACC_PRECISION)==4)
|
||||
|
|
Loading…
Reference in New Issue