git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13906 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2015-08-19 15:15:32 +00:00
parent c85a42ab6d
commit 21f8abda24
10 changed files with 122 additions and 15 deletions

View File

@ -21,6 +21,8 @@ awpmd antisymmetrized wave packet molecular dynamics, AWPMD package
from Ilya Valuev (JIHT RAS)
colvars collective variable module (Metadynamics, ABF and more)
from Giacomo Fiorin and Jerome Henin (ICMS, Temple U)
compress hook to system lib for performing I/O compression, COMPRESS pkg
from Axel Kohlmeyer (Temple U)
cuda NVIDIA GPU routines, USER-CUDA package
from Christian Trott (U Tech Ilmenau)
gpu general GPU routines, GPU package

View File

@ -0,0 +1,21 @@
# This file contains the settings to build and link LAMMPS with
# support for data compression libraries.
#
# When you build LAMMPS with the COMPRESS package installed, it will
# use the 3 settings in this file. They should be set as follows.
#
# The compress_SYSLIB setting is for linking the compression library.
# By default, the setting will point to zlib (-lz).
#
# The compress_SYSINC and compress_SYSPATH variables do not typically need
# to be set, as compression libraries are usually installed as packages
# in system locations. Otherwise, specify its directory via the
# compress_SYSPATH variable, e.g. -Ldir or compress_SYSINC variable( -Idir)
# -----------------------------------------------------------
# Settings that the LAMMPS build will import when this package is installed
compress_SYSINC =
compress_SYSLIB = -lz
compress_SYSPATH =

View File

@ -68,8 +68,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@ -117,8 +120,12 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
@ -680,6 +687,18 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OB
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu
$(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin
$(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h
$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o
$(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
@ -704,6 +723,30 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_zbl.cu
$(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin
$(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h
$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_cubic.cu
$(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin
$(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h
$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda

View File

@ -57,8 +57,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
@ -82,7 +85,10 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
$(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_debye_cl.h
$(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
$(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
$(OBJ_DIR)/lj_cubic_cl.h
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
@ -488,6 +494,15 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/lal_base_three.o
$(OCL) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
@ -506,6 +521,24 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/zbl_cl.h: lal_zbl.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh zbl $(PRE1_H) lal_zbl.cu $(OBJ_DIR)/zbl_cl.h;
$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cubic_cl.h: lal_lj_cubic.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj_cubic $(PRE1_H) lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cl.h;
$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)

View File

@ -23,4 +23,4 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -132,7 +132,7 @@ class UCL_Program {
}
return UCL_SUCCESS;
}
}
/// Return the default command queue/stream associated with this data
inline command_queue & cq() { return _cq; }
@ -315,6 +315,10 @@ class UCL_Kernel {
/// Clear any arguments associated with the kernel
inline void clear_args() { _num_args=0; }
/// Return the default command queue/stream associated with this data
inline command_queue & cq() { return _cq; }
/// Change the default command queue associated with matrix
inline void cq(command_queue &cq_in) { _cq=cq_in; }
#include "ucl_arg_kludge.h"
private:
@ -370,7 +374,7 @@ inline int UCL_Kernel::set_function(UCL_Program &program, const char *function)
}
#endif
#endif
return UCL_SUCCESS;
}

View File

@ -248,7 +248,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int i=vstart; i<iend; i++)
virial[j]+=engv[i];
if (_vf_atom)
if (_ilist==NULL) {
if (_ilist==NULL) {
for (int i=vstart, ii=0; i<iend; i++)
vatom[ii++][j]+=engv[i];
} else {

View File

@ -147,7 +147,8 @@ void BaseThreeT::clear_atomic() {
#ifdef THREE_CONCURRENT
ans2->clear();
assert(ucl_device->num_queues()==_end_command_queue+1);
ucl_device->pop_command_queue();
// ucl_device will clean up the command queue in its destructor
// ucl_device->pop_command_queue();
#endif
device->clear();
}
@ -183,7 +184,7 @@ int * BaseThreeT::reset_nbors(const int nall, const int inum, const int nlist,
// Build neighbor list on device
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
inline int BaseThreeT::build_nbor_list(const int inum, const int host_inum,
const int nall, double **host_x,
int *host_type, double *sublo,
double *subhi, tagint *tag,
@ -193,7 +194,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
resize_atom(inum,nall,success);
resize_local(nall,host_inum,nbor->max_nbors(),success);
if (!success)
return;
return 1;
atom->cast_copy_x(host_x,host_type);
int mn;
@ -206,6 +207,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
#endif
if (bytes>_max_an_bytes)
_max_an_bytes=bytes;
return mn;
}
// ---------------------------------------------------------------------------

View File

@ -28,6 +28,8 @@
#include "geryon/nvd_texture.h"
#endif
#define THREE_CONCURRENT
namespace LAMMPS_AL {
template <class numtyp, class acctyp>
@ -129,7 +131,7 @@ class BaseThree {
int *numj, int **firstneigh, bool &success);
/// Build neighbor list on device
void build_nbor_list(const int inum, const int host_inum,
int build_nbor_list(const int inum, const int host_inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, bool &success);

View File

@ -337,7 +337,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
#else
std::string fs=toa(gpu->free_gigabytes())+"/";
#endif
if (_replica_me == 0 && screen) {
fprintf(screen,"\n-------------------------------------");
fprintf(screen,"-------------------------------------\n");
@ -362,7 +362,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+
toa(gpu->gigabytes(i))+" GB, "+toa(gpu->clock_rate(i))+" GHZ (";
else
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+
sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+
toa(gpu->clock_rate(i))+" GHZ (";
if (sizeof(PRECISION)==4) {
if (sizeof(ACC_PRECISION)==4)