forked from lijiext/lammps
Updating lib/gpu to version 2.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3785 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
5ce854b780
commit
4ae4792b00
|
@ -0,0 +1,72 @@
|
|||
# /* ----------------------------------------------------------------------
|
||||
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||
# Steve Plimpton, sjplimp@sandia.gov
|
||||
#
|
||||
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
# certain rights in this software. This software is distributed under
|
||||
# the GNU General Public License.
|
||||
#
|
||||
# See the README file in the top-level LAMMPS directory.
|
||||
# ------------------------------------------------------------------------- */
|
||||
#
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
# Peng Wang (Nvidia), penwang@nvidia.com
|
||||
# Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
BIN_DIR = .
|
||||
OBJ_DIR = .
|
||||
AR = ar
|
||||
CUDA_CPP = /cygdrive/c/CUDA/bin/nvcc -I/cygdrive/c/CUDA/include -O3 -DWINDLL -DUNIX -Xptxas -v --use_fast_math
|
||||
CUDA_ARCH = -arch=sm_13
|
||||
CUDA_PREC = -D_SINGLE_SINGLE
|
||||
CUDA_LINK = -L/cygdrive/c/CUDA/lib -lcudart $(CUDA_LIB)
|
||||
|
||||
CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
||||
|
||||
CUDA_LIB = $(OBJ_DIR)/gpu.dll
|
||||
|
||||
# Headers for CUDA Stuff
|
||||
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h pair_gpu_cell.h
|
||||
# Dependencies for the Texture Tar
|
||||
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
||||
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
||||
gb_gpu_extra.h gb_gpu_kernel.h gb_gpu.cu
|
||||
|
||||
ALL_H = $(NVC_H) $(PAIR_H)
|
||||
|
||||
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||
OBJS = $(OBJ_DIR)/nvc_device.obj $(OBJ_DIR)/pair_gpu_nbor.obj \
|
||||
$(OBJ_DIR)/pair_tex_tar.obj $(OBJ_DIR)/pair_gpu_cell.obj
|
||||
|
||||
all: $(CUDA_LIB) $(EXECS)
|
||||
|
||||
$(OBJ_DIR)/nvc_device.obj : nvc_device.cu $(NVC_H)
|
||||
$(CUDA) -o $@ -c nvc_device.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_nbor.obj: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor.h $(NVC_H)
|
||||
$(CUDA) -o $@ -c pair_gpu_nbor.cu
|
||||
|
||||
$(OBJ_DIR)/pair_tex_tar.obj: $(TAR_H)
|
||||
$(CUDA) -o $@ -c pair_tex_tar.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_cell.obj: pair_gpu_cell.cu pair_gpu_cell.h lj_gpu_memory.h
|
||||
$(CUDA) -o $@ -c pair_gpu_cell.cu
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.obj
|
||||
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.obj
|
||||
|
||||
$(CUDA_LIB): $(OBJS) $(TAR_H)
|
||||
$(CUDA) -o $@ -shared $(OBJS)
|
||||
|
||||
clean:
|
||||
rm -rf $(EXECS) $(CUDA_LIB) $(OBJS) *.exe *.exp *.lib *.dll *.linkinfo
|
||||
|
||||
veryclean: clean
|
||||
rm -rf *~ *.linkinfo
|
||||
|
|
@ -1,25 +1,29 @@
|
|||
#***************************************************************************
|
||||
# Makefile
|
||||
# -------------------
|
||||
# W. Michael Brown
|
||||
#
|
||||
# _________________________________________________________________________
|
||||
# Build for the LAMMPS GPU Force Library
|
||||
#
|
||||
# _________________________________________________________________________
|
||||
#
|
||||
# begin : Tue June 23 2009
|
||||
# copyright : (C) 2009 by W. Michael Brown
|
||||
# email : wmbrown@sandia.gov
|
||||
# ***************************************************************************/
|
||||
# /* ----------------------------------------------------------------------
|
||||
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||
# Steve Plimpton, sjplimp@sandia.gov
|
||||
#
|
||||
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
# certain rights in this software. This software is distributed under
|
||||
# the GNU General Public License.
|
||||
#
|
||||
# See the README file in the top-level LAMMPS directory.
|
||||
# ------------------------------------------------------------------------- */
|
||||
#
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
# Peng Wang (Nvidia), penwang@nvidia.com
|
||||
# Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
BIN_DIR = .
|
||||
OBJ_DIR = .
|
||||
AR = ar
|
||||
CUDA_CPP = nvcc -I/usr/local/cuda/include -DUNIX -O3 -Xptxas -v --use_fast_math
|
||||
CUDA_ARCH = -maxrregcount 128 #-arch=sm_13
|
||||
CUDA_ARCH = -arch=sm_13
|
||||
CUDA_PREC = -D_SINGLE_SINGLE
|
||||
CUDA_LINK = -L/usr/local/cuda/lib64 -lcudart $(CUDA_LIB)
|
||||
CUDA_LINK = -L/usr/local/cuda/lib -lcudart $(CUDA_LIB)
|
||||
|
||||
CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
||||
|
||||
|
@ -28,7 +32,7 @@ CUDA_LIB = $(OBJ_DIR)/libgpu.a
|
|||
# Headers for CUDA Stuff
|
||||
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h
|
||||
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h pair_gpu_cell.h
|
||||
# Dependencies for the Texture Tar
|
||||
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
||||
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
||||
|
@ -38,7 +42,7 @@ ALL_H = $(NVC_H) $(PAIR_H)
|
|||
|
||||
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||
OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/pair_gpu_nbor.cu_o \
|
||||
$(OBJ_DIR)/pair_tex_tar.cu_o
|
||||
$(OBJ_DIR)/pair_tex_tar.cu_o $(OBJ_DIR)/pair_gpu_cell.cu_o
|
||||
|
||||
all: $(CUDA_LIB) $(EXECS)
|
||||
|
||||
|
@ -51,6 +55,9 @@ $(OBJ_DIR)/pair_gpu_nbor.cu_o: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor
|
|||
$(OBJ_DIR)/pair_tex_tar.cu_o: $(TAR_H)
|
||||
$(CUDA) -o $@ -c pair_tex_tar.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_cell.cu_o: pair_gpu_cell.cu pair_gpu_cell.h lj_gpu_memory.h
|
||||
$(CUDA) -o $@ -c pair_gpu_cell.cu
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.o
|
||||
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.o
|
||||
|
||||
|
|
|
@ -1,29 +1,25 @@
|
|||
/***************************************************************************
|
||||
README
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
README for building LAMMPS GPU Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Thu Jun 25 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
GENERAL NOTES
|
||||
|
||||
This library, pair_gpu_lib.a, provides routines for GPGPU acceleration
|
||||
This library, libgpu.a, provides routines for GPU acceleration
|
||||
of LAMMPS pair styles. Currently, only CUDA enabled GPUs are
|
||||
supported. Compilation of this library requires installing the CUDA
|
||||
GPU driver and CUDA toolkit for your operating system. In addition to
|
||||
|
@ -33,14 +29,14 @@ devices on your system.
|
|||
|
||||
NOTE: Installation of the CUDA SDK is not required.
|
||||
|
||||
Current pair styles supporting GPU Accelartion:
|
||||
Current pair styles supporting GPU acceleration:
|
||||
|
||||
1. lj/cut/gpu
|
||||
2. gayberne/gpu
|
||||
|
||||
MULTIPLE LAMMPS PROCESSES
|
||||
|
||||
When using GPGPU acceleration, you are restricted to one physical GPU
|
||||
When using GPU acceleration, you are restricted to one physical GPU
|
||||
per LAMMPS process. This can be multiple GPUs on a single node or
|
||||
across multiple nodes. Intructions on GPU assignment can be found in
|
||||
the LAMMPS documentation.
|
||||
|
@ -66,6 +62,9 @@ the CUDA_PREC variable:
|
|||
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
NOTE: For the lj/cut pair style, only single precision will be used, even
|
||||
if double precision is specified.
|
||||
|
||||
NOTE: Double precision is only supported on certain GPUS (with
|
||||
compute capability>=1.3).
|
||||
|
||||
|
@ -74,14 +73,14 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
|
|||
|
||||
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
||||
package has been installed before installing the GPU package in LAMMPS.
|
||||
|
||||
|
||||
GPU MEMORY
|
||||
|
||||
Upon initialization of the pair style, the library will reserve memory
|
||||
for 64K atoms per GPU or 70% of each cards GPU memory, whichever value
|
||||
Upon initialization of the gayberne/gpu pair style, the library will reserve
|
||||
memory for 64K atoms per GPU or 70% of each cards GPU memory, whichever value
|
||||
is limiting. The value of 70% can be changed by editing the
|
||||
PERCENT_GPU_MEMORY definition in the source file. The value of 64K
|
||||
cannot be increased and is the maximum number of atoms allowed per
|
||||
PERCENT_GPU_MEMORY definition in the source file. For gayberne/gpu, the value
|
||||
of 64K cannot be increased and is the maximum number of atoms allowed per
|
||||
GPU. Using the 'neigh_modify one' modifier in your LAMMPS input script
|
||||
can help to increase maximum number of atoms per GPU for cards with
|
||||
limited memory.
|
||||
|
|
|
@ -1,27 +1,21 @@
|
|||
/***************************************************************************
|
||||
gb_gpu.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Gay-Berne anisotropic potential GPU calcultation
|
||||
|
||||
*** Force decomposition by Atom Version ***
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
@ -190,30 +184,30 @@ inline string gb_gpu_toa(const t& in) {
|
|||
// ---------------------------------------------------------------------------
|
||||
// Return string with GPU info
|
||||
// ---------------------------------------------------------------------------
|
||||
string gb_gpu_name(const int id, const int max_nbors) {
|
||||
string name=GBMF[0].gpu.name(id)+", "+
|
||||
EXTERN void gb_gpu_name(const int id, const int max_nbors, char * name) {
|
||||
string sname=GBMF[0].gpu.name(id)+", "+
|
||||
gb_gpu_toa(GBMF[0].gpu.cores(id))+" cores, "+
|
||||
gb_gpu_toa(GBMF[0].gpu.gigabytes(id))+" GB, "+
|
||||
gb_gpu_toa(GBMF[0].gpu.clock_rate(id))+" GHZ, "+
|
||||
gb_gpu_toa(GBMF[0].get_max_atoms(GBMF[0].gpu.bytes(id),
|
||||
max_nbors))+" Atoms";
|
||||
return name;
|
||||
strcpy(name,sname.c_str());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
||||
const double upsilon, const double mu, double **shape,
|
||||
double **well, double **cutsq, double **sigma,
|
||||
double **epsilon, double *host_lshape, int **form,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int max_nbors, const int thread, const int gpu_id) {
|
||||
EXTERN bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
||||
const double upsilon, const double mu, double **shape,
|
||||
double **well, double **cutsq, double **sigma,
|
||||
double **epsilon, double *host_lshape, int **form,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int max_nbors, const int thread, const int gpu_id) {
|
||||
assert(thread<MAX_GPU_THREADS);
|
||||
|
||||
GBMF[thread].gpu.init();
|
||||
|
||||
|
||||
if (GBMF[thread].gpu.num_devices()==0)
|
||||
return false;
|
||||
|
||||
|
@ -227,7 +221,7 @@ bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
|||
// ---------------------------------------------------------------------------
|
||||
// Clear memory on host and device
|
||||
// ---------------------------------------------------------------------------
|
||||
void gb_gpu_clear(const int thread) {
|
||||
EXTERN void gb_gpu_clear(const int thread) {
|
||||
GBMF[thread].clear();
|
||||
}
|
||||
|
||||
|
@ -262,7 +256,7 @@ inline void _gb_gpu_atom(PairGPUAtom<numtyp,acctyp> &atom, double **host_x,
|
|||
atom.time_atom.stop();
|
||||
}
|
||||
|
||||
void gb_gpu_atom(double **host_x, double **host_quat,
|
||||
EXTERN void gb_gpu_atom(double **host_x, double **host_quat,
|
||||
const int *host_type, const bool rebuild, const int thread) {
|
||||
_gb_gpu_atom(GBMF[thread].atom, host_x, host_quat, host_type, rebuild,
|
||||
GBMF[thread].pair_stream);
|
||||
|
@ -327,7 +321,7 @@ int * _gb_gpu_reset_nbors(gbmtyp &gbm, const int nall, const int nlocal,
|
|||
return ilist;
|
||||
}
|
||||
|
||||
int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
||||
EXTERN int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
||||
int *ilist, const int *numj, const int *type,
|
||||
const int thread, bool &success) {
|
||||
return _gb_gpu_reset_nbors(GBMF[thread],nall,nlocal,inum,ilist,numj,type,
|
||||
|
@ -340,7 +334,7 @@ int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
|||
// ---------------------------------------------------------------------------
|
||||
template <class gbmtyp>
|
||||
void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij,
|
||||
const bool eflag) {
|
||||
const bool eflag) {
|
||||
gbm.nbor.time_nbor.add_to_total();
|
||||
// CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); // Not if timed
|
||||
|
||||
|
@ -350,8 +344,8 @@ void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij,
|
|||
gbm.nbor.time_nbor.stop();
|
||||
}
|
||||
|
||||
void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
|
||||
const int thread) {
|
||||
EXTERN void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
|
||||
const int thread) {
|
||||
_gb_gpu_nbors(GBMF[thread],ij,num_ij,eflag);
|
||||
}
|
||||
|
||||
|
@ -453,7 +447,7 @@ void _gb_gpu_gayberne(GBMT &gbm, const bool eflag, const bool vflag,
|
|||
}
|
||||
}
|
||||
|
||||
void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
||||
EXTERN void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
||||
const int thread) {
|
||||
_gb_gpu_gayberne<PRECISION,ACC_PRECISION>(GBMF[thread],eflag,vflag,rebuild);
|
||||
}
|
||||
|
@ -490,7 +484,7 @@ double _gb_gpu_forces(GBMT &gbm, double **f, double **tor, const int *ilist,
|
|||
return evdw;
|
||||
}
|
||||
|
||||
double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
||||
EXTERN double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial, const int thread) {
|
||||
|
@ -499,7 +493,7 @@ double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
|||
vflag_atom,eatom,vatom,virial);
|
||||
}
|
||||
|
||||
void gb_gpu_time(const int i) {
|
||||
EXTERN void gb_gpu_time(const int i) {
|
||||
cout.precision(4);
|
||||
cout << "Atom copy: " << GBMF[i].atom.time_atom.total_seconds()
|
||||
<< " s.\n"
|
||||
|
@ -515,10 +509,10 @@ void gb_gpu_time(const int i) {
|
|||
<< " s.\n";
|
||||
}
|
||||
|
||||
int gb_gpu_num_devices() {
|
||||
EXTERN int gb_gpu_num_devices() {
|
||||
return GBMF[0].gpu.num_devices();
|
||||
}
|
||||
|
||||
double gb_gpu_bytes() {
|
||||
EXTERN double gb_gpu_bytes() {
|
||||
return GBMF[0].host_memory_usage();
|
||||
}
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
gb_gpu_extra.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Inline GPU kernel routines ala math_extra for the CPU.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef GB_GPU_EXTRA_H
|
||||
#define GB_GPU_EXTRA_H
|
||||
|
|
|
@ -1,26 +1,21 @@
|
|||
/***************************************************************************
|
||||
gb_gpu_kernel.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Routines that actually perform the force/torque computation
|
||||
|
||||
*** Force Decomposition by Atom Version ***
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef GB_GPU_KERNEL
|
||||
#define GB_GPU_KERNEL
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
gb_gpu_memory.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Global variables for GPU Gayberne Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Thu Jun 25 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "gb_gpu_memory.h"
|
||||
#define GB_GPU_MemoryT GB_GPU_Memory<numtyp, acctyp>
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
gb_gpu_memory.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Global variables for GPU Gayberne Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Thu Jun 25 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef GB_GPU_MEMORY_H
|
||||
#define GB_GPU_MEMORY_H
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
lj_gpu.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Lennard-Jones potential GPU calcultation
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
@ -27,18 +23,39 @@
|
|||
#include "nvc_timer.h"
|
||||
#include "nvc_device.h"
|
||||
#include "pair_gpu_texture.h"
|
||||
#include "pair_gpu_cell.h"
|
||||
#include "lj_gpu_memory.cu"
|
||||
#include "lj_gpu_kernel.h"
|
||||
|
||||
#ifdef WINDLL
|
||||
#include <windows.h>
|
||||
BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, LPVOID lpReserved)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WINDLL
|
||||
#define EXTERN extern "C" __declspec(dllexport)
|
||||
#else
|
||||
#define EXTERN
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
static LJ_GPU_Memory<PRECISION,ACC_PRECISION> LJMF;
|
||||
#define LJMT LJ_GPU_Memory<numtyp,acctyp>
|
||||
|
||||
static float kernelTime = 0.0;
|
||||
static int ncell1D;
|
||||
static float *energy, *d_energy;
|
||||
static float3 *d_force, *f_temp, *v_temp, *d_virial;
|
||||
static cell_list cell_list_gpu;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Convert something to a string
|
||||
// ---------------------------------------------------------------------------
|
||||
#include <sstream>
|
||||
|
||||
template <class t>
|
||||
inline string lj_gpu_toa(const t& in) {
|
||||
ostringstream o;
|
||||
|
@ -50,113 +67,54 @@ inline string lj_gpu_toa(const t& in) {
|
|||
// ---------------------------------------------------------------------------
|
||||
// Return string with GPU info
|
||||
// ---------------------------------------------------------------------------
|
||||
string lj_gpu_name(const int id, const int max_nbors) {
|
||||
string name=LJMF.gpu.name(id)+", "+
|
||||
EXTERN void lj_gpu_name(const int id, const int max_nbors, char * name) {
|
||||
string sname=LJMF.gpu.name(id)+", "+
|
||||
lj_gpu_toa(LJMF.gpu.cores(id))+" cores, "+
|
||||
lj_gpu_toa(LJMF.gpu.gigabytes(id))+" GB, "+
|
||||
lj_gpu_toa(LJMF.gpu.clock_rate(id))+" GHZ, "+
|
||||
lj_gpu_toa(LJMF.get_max_atoms(LJMF.gpu.bytes(id),
|
||||
max_nbors))+" Atoms";
|
||||
return name;
|
||||
lj_gpu_toa(LJMF.gpu.clock_rate(id))+" GHZ";
|
||||
strcpy(name,sname.c_str());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
||||
double **epsilon, double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4, double **offset,
|
||||
double *special_lj, const int max_nbors, const int gpu_id) {
|
||||
LJMF.gpu.init();
|
||||
EXTERN bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
||||
double **epsilon, double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4, double **offset,
|
||||
double *special_lj, double *boxlo, double *boxhi,
|
||||
double cell_size, double skin,
|
||||
const int max_nbors, const int gpu_id) {
|
||||
LJMF.gpu.init();
|
||||
if (LJMF.gpu.num_devices()==0)
|
||||
return false;
|
||||
|
||||
ij_size=IJ_SIZE;
|
||||
return LJMF.init(ij_size, ntypes, cutsq, sigma, epsilon, host_lj1, host_lj2,
|
||||
host_lj3, host_lj4, offset, special_lj, max_nbors, gpu_id);
|
||||
|
||||
bool ret = LJMF.init(ij_size, ntypes, cutsq, sigma, epsilon, host_lj1, host_lj2,
|
||||
host_lj3, host_lj4, offset, special_lj, max_nbors, gpu_id);
|
||||
|
||||
ncell1D = ceil(((boxhi[0] - boxlo[0]) + 2.0*cell_size) / cell_size);
|
||||
|
||||
init_cell_list_const(cell_size, skin, boxlo, boxhi);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Clear memory on host and device
|
||||
// ---------------------------------------------------------------------------
|
||||
void lj_gpu_clear() {
|
||||
EXTERN void lj_gpu_clear() {
|
||||
free(energy);
|
||||
free(v_temp);
|
||||
cudaFreeHost(f_temp);
|
||||
cudaFree(d_force);
|
||||
cudaFree(d_energy);
|
||||
cudaFree(d_virial);
|
||||
clear_cell_list(cell_list_gpu);
|
||||
|
||||
LJMF.clear();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// copy atom positions and optionally types to device
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
inline void _lj_gpu_atom(PairGPUAtom<numtyp,acctyp> &atom, double **host_x,
|
||||
const int *host_type, const bool rebuild,
|
||||
cudaStream_t &stream) {
|
||||
atom.time_atom.start();
|
||||
atom.reset_write_buffer();
|
||||
|
||||
// First row of dev_x is x position, second is y, third is z
|
||||
atom.add_atom_data(host_x[0],3);
|
||||
atom.add_atom_data(host_x[0]+1,3);
|
||||
atom.add_atom_data(host_x[0]+2,3);
|
||||
|
||||
int csize=3;
|
||||
|
||||
// If a rebuild occured, copy type data
|
||||
if (rebuild) {
|
||||
atom.add_atom_data(host_type);
|
||||
csize++;
|
||||
}
|
||||
|
||||
atom.copy_atom_data(csize,stream);
|
||||
atom.time_atom.stop();
|
||||
}
|
||||
|
||||
void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild) {
|
||||
_lj_gpu_atom(LJMF.atom, host_x, host_type, rebuild, LJMF.pair_stream);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Signal that we need to transfer a new neighbor list
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class LJMTyp>
|
||||
bool _lj_gpu_reset_nbors(LJMTyp &ljm, const int nall, const int inum,
|
||||
int *ilist, const int *numj) {
|
||||
if (nall>ljm.max_atoms)
|
||||
return false;
|
||||
|
||||
ljm.nbor.time_nbor.start();
|
||||
|
||||
ljm.atom.nall(nall);
|
||||
ljm.atom.inum(inum);
|
||||
ljm.nbor.reset(inum,ilist,numj,ljm.pair_stream);
|
||||
|
||||
ljm.nbor.time_nbor.stop();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist,
|
||||
const int *numj) {
|
||||
return _lj_gpu_reset_nbors(LJMF,nall,inum,ilist,numj);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy a set of ij_size ij interactions to device and compute energies,
|
||||
// forces, and torques for those interactions
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class LJMTyp>
|
||||
void _lj_gpu_nbors(LJMTyp &ljm, const int *ij, const int num_ij) {
|
||||
ljm.nbor.time_nbor.add_to_total();
|
||||
|
||||
// CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); // Not if timed
|
||||
|
||||
memcpy(ljm.nbor.host_ij.begin(),ij,num_ij*sizeof(int));
|
||||
ljm.nbor.time_nbor.start();
|
||||
ljm.nbor.add(num_ij,ljm.pair_stream);
|
||||
ljm.nbor.time_nbor.stop();
|
||||
}
|
||||
|
||||
void lj_gpu_nbors(const int *ij, const int num_ij) {
|
||||
_lj_gpu_nbors(LJMF,ij,num_ij);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Calculate energies and forces for all ij interactions
|
||||
|
@ -169,6 +127,7 @@ void _lj_gpu(LJMT &ljm, const bool eflag, const bool vflag, const bool rebuild){
|
|||
int GX=static_cast<int>(ceil(static_cast<double>(ljm.atom.inum())/BX));
|
||||
|
||||
ljm.time_pair.start();
|
||||
|
||||
if (ljm.shared_types)
|
||||
kernel_lj_fast<numtyp,acctyp><<<GX,BX,0,ljm.pair_stream>>>
|
||||
(ljm.special_lj.begin(), ljm.nbor.dev_nbor.begin(),
|
||||
|
@ -181,47 +140,279 @@ void _lj_gpu(LJMT &ljm, const bool eflag, const bool vflag, const bool rebuild){
|
|||
ljm.nbor.ij.begin(), ljm.nbor.dev_nbor.row_size(),
|
||||
ljm.atom.ans.begin(), ljm.atom.ans.row_size(), eflag,
|
||||
vflag, ljm.atom.inum(), ljm.atom.nall());
|
||||
ljm.time_pair.stop();
|
||||
}
|
||||
|
||||
EXTERN void lj_gpu(const bool eflag, const bool vflag, const bool rebuild) {
|
||||
_lj_gpu<PRECISION,ACC_PRECISION>(LJMF, eflag,vflag,rebuild);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double _lj_gpu_cell(LJMT &ljm, double **force, double *virial,
|
||||
double **host_x, int *host_type, const int inum,
|
||||
const int nall, const int ago, const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi)
|
||||
{
|
||||
ljm.atom.nall(nall);
|
||||
ljm.atom.inum(inum);
|
||||
|
||||
ljm.nbor.time_nbor.start();
|
||||
ljm.nbor.time_nbor.stop();
|
||||
|
||||
double evdwl=0.0;
|
||||
|
||||
static int buffer = CELL_SIZE;
|
||||
static int ncell = (int)pow((float)ncell1D,3);
|
||||
|
||||
static int first_call = 1;
|
||||
|
||||
// allocate memory on CPU and GPU
|
||||
if (first_call) {
|
||||
energy = (float*) malloc(inum*sizeof(float));
|
||||
v_temp = (float3*)malloc(inum*2*sizeof(float3));
|
||||
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||
|
||||
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||
|
||||
init_cell_list(cell_list_gpu, nall, ncell, buffer);
|
||||
|
||||
first_call = 0;
|
||||
}
|
||||
|
||||
if (!first_call && ago == 0) {
|
||||
free(energy);
|
||||
free(v_temp);
|
||||
cudaFreeHost(f_temp);
|
||||
cudaFree(d_force);
|
||||
cudaFree(d_energy);
|
||||
cudaFree(d_virial);
|
||||
|
||||
energy = (float*) malloc(inum*sizeof(float));
|
||||
v_temp = (float3*)malloc(inum*2*sizeof(float3));
|
||||
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||
|
||||
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||
|
||||
clear_cell_list(cell_list_gpu);
|
||||
init_cell_list(cell_list_gpu, nall, ncell, buffer);
|
||||
}
|
||||
|
||||
// build cell-list on GPU
|
||||
ljm.atom.time_atom.start();
|
||||
build_cell_list(host_x[0], host_type, cell_list_gpu,
|
||||
ncell, ncell1D, buffer, inum, nall, ago);
|
||||
ljm.atom.time_atom.stop();
|
||||
|
||||
ljm.time_pair.start();
|
||||
|
||||
#ifdef TIMING
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
// call the cell-list force kernel
|
||||
const int BX=BLOCK_1D;
|
||||
dim3 GX(ncell1D, ncell1D*ncell1D);
|
||||
if (eflag == 0 && vflag == 0) {
|
||||
kernel_lj_cell<false,false><<<GX, BX, 0>>>
|
||||
(d_force, d_energy, d_virial,
|
||||
cell_list_gpu.pos,
|
||||
cell_list_gpu.idx,
|
||||
cell_list_gpu.type,
|
||||
cell_list_gpu.natom,
|
||||
inum, nall, ncell);
|
||||
} else {
|
||||
kernel_lj_cell<true,true><<<GX, BX, 3*sizeof(float)*MAX_SHARED_TYPES*MAX_SHARED_TYPES>>>
|
||||
(d_force, d_energy, d_virial,
|
||||
cell_list_gpu.pos,
|
||||
cell_list_gpu.idx,
|
||||
cell_list_gpu.type,
|
||||
cell_list_gpu.natom,
|
||||
inum, nall, ncell);
|
||||
}
|
||||
|
||||
#ifdef TIMING
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
float kTime;
|
||||
cudaEventElapsedTime(&kTime, start, stop);
|
||||
kernelTime += kTime;
|
||||
printf("kernelTime = %f, eflag=%d, vflag=%d\n", kTime, eflag, vflag);
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
#endif
|
||||
|
||||
// copy results from GPU to CPU
|
||||
cudaMemcpy(f_temp, d_force, inum*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||
if (eflag) {
|
||||
cudaMemcpy(energy, d_energy, inum*sizeof(float), cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < inum; i++) {
|
||||
evdwl += energy[i];
|
||||
}
|
||||
evdwl *= 0.5f;
|
||||
}
|
||||
if (vflag) {
|
||||
cudaMemcpy(v_temp, d_virial, inum*2*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < inum; i++) {
|
||||
virial[0] += v_temp[2*i].x;
|
||||
virial[1] += v_temp[2*i].y;
|
||||
virial[2] += v_temp[2*i].z;
|
||||
virial[3] += v_temp[2*i+1].x;
|
||||
virial[4] += v_temp[2*i+1].y;
|
||||
virial[5] += v_temp[2*i+1].z;
|
||||
}
|
||||
for (int i = 0; i < 6; i++)
|
||||
virial[i] *= 0.5f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) {
|
||||
force[i][0] += f_temp[i].x;
|
||||
force[i][1] += f_temp[i].y;
|
||||
force[i][2] += f_temp[i].z;
|
||||
}
|
||||
|
||||
ljm.time_pair.stop();
|
||||
}
|
||||
|
||||
void lj_gpu(const bool eflag, const bool vflag, const bool rebuild) {
|
||||
_lj_gpu<PRECISION,ACC_PRECISION>(LJMF,eflag,vflag,rebuild);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Get energies and forces to host
|
||||
// ---------------------------------------------------------------------------
|
||||
template<class numtyp, class acctyp>
|
||||
double _lj_gpu_forces(LJMT &ljm, double **f, const int *ilist,
|
||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial) {
|
||||
double evdw;
|
||||
|
||||
ljm.atom.time_answer.start();
|
||||
ljm.atom.copy_answers(eflag,vflag,ljm.pair_stream);
|
||||
|
||||
ljm.atom.time_atom.add_to_total();
|
||||
ljm.nbor.time_nbor.add_to_total();
|
||||
ljm.time_pair.add_to_total();
|
||||
CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream));
|
||||
|
||||
evdw=ljm.atom.energy_virial(ilist,eflag_atom,vflag_atom,eatom,vatom,virial);
|
||||
ljm.atom.add_forces(ilist,f);
|
||||
ljm.atom.time_answer.stop();
|
||||
ljm.atom.time_answer.add_to_total();
|
||||
return evdw;
|
||||
|
||||
return evdwl;
|
||||
|
||||
}
|
||||
|
||||
double lj_gpu_forces(double **f, const int *ilist, const bool eflag,
|
||||
const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial) {
|
||||
return _lj_gpu_forces<PRECISION,ACC_PRECISION>
|
||||
(LJMF,f,ilist,eflag,vflag,eflag_atom,vflag_atom,eatom,vatom,virial);
|
||||
EXTERN double lj_gpu_cell(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||
const int ago, const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi)
|
||||
{
|
||||
return _lj_gpu_cell<PRECISION,ACC_PRECISION>(LJMF, force, virial, host_x, host_type, inum, nall,
|
||||
ago, eflag, vflag, boxlo, boxhi);
|
||||
}
|
||||
|
||||
void lj_gpu_time() {
|
||||
template <class numtyp, class acctyp>
|
||||
double _lj_gpu_n2(LJMT &ljm, double **force, double *virial,
|
||||
double **host_x, int *host_type, const int inum, const int nall, const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi)
|
||||
{
|
||||
ljm.atom.nall(nall);
|
||||
ljm.atom.inum(inum);
|
||||
|
||||
|
||||
ljm.nbor.time_nbor.start();
|
||||
ljm.nbor.time_nbor.stop();
|
||||
|
||||
|
||||
double evdwl=0.0;
|
||||
|
||||
#ifdef NOUSE
|
||||
static int first_call = 1;
|
||||
|
||||
if (first_call) {
|
||||
energy = (float*) malloc(inum*sizeof(float));
|
||||
v_temp = (float3*) malloc(inum*2*sizeof(float3));
|
||||
cudaMallocHost((void**)&f_temp, inum*sizeof(float3));
|
||||
cudaMallocHost((void**)&pos_temp, nall*sizeof(float3));
|
||||
cudaMalloc((void**)&d_force, inum*sizeof(float3));
|
||||
cudaMalloc((void**)&d_energy, inum*sizeof(float));
|
||||
cudaMalloc((void**)&d_virial, inum*3*sizeof(float3));
|
||||
cudaMalloc((void**)&d_pos, nall*sizeof(float3));
|
||||
cudaMalloc((void**)&d_type, nall*sizeof(int));
|
||||
first_call = 0;
|
||||
}
|
||||
|
||||
|
||||
ljm.atom.time_atom.start();
|
||||
double *atom_pos = host_x[0];
|
||||
for (int i = 0; i < 3*nall; i+=3) {
|
||||
pos_temp[i/3] = make_float3(atom_pos[i], atom_pos[i+1], atom_pos[i+2]);
|
||||
}
|
||||
cudaMemcpy(d_pos, pos_temp, nall*sizeof(float3), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_type, host_type, nall*sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
ljm.atom.time_atom.stop();
|
||||
|
||||
ljm.time_pair.start();
|
||||
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=BLOCK_1D;
|
||||
dim3 GX(static_cast<int>(ceil(static_cast<double>(inum)/BX)));
|
||||
|
||||
#ifdef TIMING
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start, 0);
|
||||
#endif
|
||||
|
||||
// N^2 force kernel
|
||||
kernel_lj_n2<numtyp, acctyp><<<GX, BX>>>(d_force, d_energy, d_virial,
|
||||
d_pos, d_type, eflag, vflag, inum, nall);
|
||||
|
||||
#ifdef TIMING
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaEventSynchronize(stop);
|
||||
float kTime;
|
||||
cudaEventElapsedTime(&kTime, start, stop);
|
||||
kernelTime += kTime;
|
||||
printf("kernelTime = %f, eflag=%d, vflag=%d\n", kTime, eflag, vflag);
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
#endif
|
||||
|
||||
// copy results from GPU to CPU
|
||||
cudaMemcpy(f_temp, d_force, inum*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||
if (eflag) {
|
||||
cudaMemcpy(energy, d_energy, inum*sizeof(float), cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < inum; i++) {
|
||||
evdwl += energy[i];
|
||||
}
|
||||
evdwl *= 0.5f;
|
||||
}
|
||||
if (vflag) {
|
||||
cudaMemcpy(v_temp, d_virial, inum*2*sizeof(float3), cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < inum; i++) {
|
||||
virial[0] += v_temp[2*i].x;
|
||||
virial[1] += v_temp[2*i].y;
|
||||
virial[2] += v_temp[2*i].z;
|
||||
virial[3] += v_temp[2*i+1].x;
|
||||
virial[4] += v_temp[2*i+1].y;
|
||||
virial[5] += v_temp[2*i+1].z;
|
||||
}
|
||||
for (int i = 0; i < 6; i++)
|
||||
virial[i] *= 0.5f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) {
|
||||
force[i][0] += f_temp[i].x;
|
||||
force[i][1] += f_temp[i].y;
|
||||
force[i][2] += f_temp[i].z;
|
||||
}
|
||||
#endif
|
||||
ljm.time_pair.stop();
|
||||
|
||||
ljm.atom.time_atom.add_to_total();
|
||||
ljm.nbor.time_nbor.add_to_total();
|
||||
ljm.time_pair.add_to_total();
|
||||
|
||||
return evdwl;
|
||||
}
|
||||
|
||||
EXTERN double lj_gpu_n2(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||
const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi)
|
||||
{
|
||||
return _lj_gpu_n2<PRECISION,ACC_PRECISION>(LJMF, force, virial, host_x, host_type, inum, nall,
|
||||
eflag, vflag, boxlo, boxhi);
|
||||
}
|
||||
|
||||
EXTERN void lj_gpu_time() {
|
||||
cout.precision(4);
|
||||
cout << "Atom copy: " << LJMF.atom.time_atom.total_seconds() << " s.\n";
|
||||
cout << "Neighbor copy: " << LJMF.nbor.time_nbor.total_seconds() << " s.\n";
|
||||
|
@ -229,10 +420,10 @@ void lj_gpu_time() {
|
|||
cout << "Answer copy: " << LJMF.atom.time_answer.total_seconds() << " s.\n";
|
||||
}
|
||||
|
||||
int lj_gpu_num_devices() {
|
||||
EXTERN int lj_gpu_num_devices() {
|
||||
return LJMF.gpu.num_devices();
|
||||
}
|
||||
|
||||
double lj_gpu_bytes() {
|
||||
EXTERN double lj_gpu_bytes() {
|
||||
return LJMF.host_memory_usage();
|
||||
}
|
||||
|
|
|
@ -1,29 +1,220 @@
|
|||
/***************************************************************************
|
||||
lj_gpu_kernel.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Routines that actually perform the force computation
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LJ_GPU_KERNEL
|
||||
#define LJ_GPU_KERNEL
|
||||
|
||||
/* Cell list version of LJ kernel */
|
||||
template<bool eflag, bool vflag>
|
||||
__global__ void kernel_lj_cell(float3 *force3,
|
||||
float *energy, float3 *virial,
|
||||
float3 *cell_list, unsigned int *cell_idx,
|
||||
int *cell_type, int *cell_atom,
|
||||
const int inum, const int nall, const int ncell)
|
||||
{
|
||||
// calculate 3D block idx from 2d block
|
||||
int bx = blockIdx.x;
|
||||
int by = blockIdx.y % gridDim.x;
|
||||
int bz = blockIdx.y / gridDim.x;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
// compute cell idx from 3D block idx
|
||||
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||
|
||||
__shared__ int typeSh[CELL_SIZE];
|
||||
__shared__ float posSh[CELL_SIZE*3];
|
||||
__shared__ float cutsqSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ float lj1Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ float lj2Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
|
||||
extern __shared__ float smem[];
|
||||
|
||||
__shared__ float *lj3Sh;
|
||||
__shared__ float *lj4Sh;
|
||||
__shared__ float *offsetSh;
|
||||
|
||||
// load force parameters into shared memory
|
||||
for (int i = tid; i < MAX_SHARED_TYPES*MAX_SHARED_TYPES; i += BLOCK_1D) {
|
||||
int itype = i/MAX_SHARED_TYPES;
|
||||
int jtype = i%MAX_SHARED_TYPES;
|
||||
cutsqSh[i] = _cutsq_<float>(itype,jtype);
|
||||
lj1Sh[i] = _lj1_<float>(itype,jtype).x;
|
||||
lj2Sh[i] = _lj1_<float>(itype,jtype).y;
|
||||
}
|
||||
|
||||
// Only allocate shared memory when needed,
|
||||
// this reduces shared memory limitation on occupancy
|
||||
if (eflag || vflag) {
|
||||
lj3Sh = smem;
|
||||
lj4Sh = lj3Sh + MAX_SHARED_TYPES*MAX_SHARED_TYPES;
|
||||
offsetSh = lj4Sh + MAX_SHARED_TYPES*MAX_SHARED_TYPES;
|
||||
for (int i = tid; i < MAX_SHARED_TYPES*MAX_SHARED_TYPES; i += BLOCK_1D) {
|
||||
int itype = i/MAX_SHARED_TYPES;
|
||||
int jtype = i%MAX_SHARED_TYPES;
|
||||
lj3Sh[i] = _lj3_<float>(itype,jtype).x+0.01;
|
||||
lj4Sh[i] = _lj3_<float>(itype,jtype).y;
|
||||
offsetSh[i]= _offset_<float>(itype,jtype);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
int nborz0 = max(bz-1,0), nborz1 = min(bz+1, gridDim.x-1),
|
||||
nbory0 = max(by-1,0), nbory1 = min(by+1, gridDim.x-1),
|
||||
nborx0 = max(bx-1,0), nborx1 = min(bx+1, gridDim.x-1);
|
||||
|
||||
for (int ii = 0; ii < ceil((float)(cell_atom[cid])/BLOCK_1D); ii++) {
|
||||
float3 f = {0.0f, 0.0f, 0.0f};
|
||||
float ener = 0.0f;
|
||||
float3 v0 = {0.0f, 0.0f, 0.0f}, v1 = {0.0f, 0.0f, 0.0f};
|
||||
int itype;
|
||||
float ix, iy, iz;
|
||||
int i = tid + ii*BLOCK_1D;
|
||||
unsigned int answer_pos = cell_idx[cid*CELL_SIZE+i];
|
||||
|
||||
// load current cell atom position and type into sMem
|
||||
for (int j = tid; j < cell_atom[cid]; j += BLOCK_1D) {
|
||||
int pid = cid*CELL_SIZE + j;
|
||||
float3 pos = cell_list[pid];
|
||||
posSh[j ] = pos.x;
|
||||
posSh[j+ CELL_SIZE] = pos.y;
|
||||
posSh[j+2*CELL_SIZE] = pos.z;
|
||||
typeSh[j] = cell_type[pid];
|
||||
}
|
||||
__syncthreads();
|
||||
if (answer_pos < inum) {
|
||||
itype = typeSh[i];
|
||||
ix = posSh[i ];
|
||||
iy = posSh[i+ CELL_SIZE];
|
||||
iz = posSh[i+2*CELL_SIZE];
|
||||
|
||||
// compute force from current cell
|
||||
for (int j = 0; j < cell_atom[cid]; j++) {
|
||||
if (j == i) continue;
|
||||
float delx = ix - posSh[j ];
|
||||
float dely = iy - posSh[j+ CELL_SIZE];
|
||||
float delz = iz - posSh[j+2*CELL_SIZE];
|
||||
int jtype = typeSh[j];
|
||||
int mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||
float r2inv = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (r2inv < cutsqSh[mtype]) {
|
||||
r2inv = 1.0f/r2inv;
|
||||
float r6inv = r2inv * r2inv * r2inv;
|
||||
float force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||
f.x += delx * force;
|
||||
f.y += dely * force;
|
||||
f.z += delz * force;
|
||||
|
||||
if (eflag) {
|
||||
float e = r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||
ener += (e - offsetSh[mtype]);
|
||||
}
|
||||
|
||||
if (vflag) {
|
||||
v0.x += delx*delx*force;
|
||||
v0.y += dely*dely*force;
|
||||
v0.z += delz*delz*force;
|
||||
v1.x += delx*dely*force;
|
||||
v1.y += delx*delz*force;
|
||||
v1.z += dely*delz*force;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// compute force from neigboring cells
|
||||
for (int nborz = nborz0; nborz <= nborz1; nborz++) {
|
||||
for (int nbory = nbory0; nbory <= nbory1; nbory++) {
|
||||
for (int nborx = nborx0; nborx <= nborx1; nborx++) {
|
||||
if (nborz == bz && nbory == by && nborx == bx) continue;
|
||||
|
||||
// compute cell id
|
||||
int cid_nbor = nborx + INT_MUL(nbory,gridDim.x) +
|
||||
INT_MUL(nborz,gridDim.x*gridDim.x);
|
||||
|
||||
// load neighbor cell position and type into smem
|
||||
for (int j = tid; j < cell_atom[cid_nbor]; j += BLOCK_1D) {
|
||||
int pid = INT_MUL(cid_nbor,CELL_SIZE) + j;
|
||||
float3 pos = cell_list[pid];
|
||||
posSh[j ] = pos.x;
|
||||
posSh[j+ CELL_SIZE] = pos.y;
|
||||
posSh[j+2*CELL_SIZE] = pos.z;
|
||||
typeSh[j] = cell_type[pid];
|
||||
}
|
||||
__syncthreads();
|
||||
// compute force
|
||||
if (answer_pos < inum) {
|
||||
for (int j = 0; j < cell_atom[cid_nbor]; j++) {
|
||||
float delx = ix - posSh[j ];
|
||||
float dely = iy - posSh[j+ CELL_SIZE];
|
||||
float delz = iz - posSh[j+2*CELL_SIZE];
|
||||
int jtype = typeSh[j];
|
||||
int mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||
float r2inv = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (r2inv < cutsqSh[mtype]) {
|
||||
r2inv = 1.0f/r2inv;
|
||||
float r6inv = r2inv * r2inv * r2inv;
|
||||
float force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||
f.x += delx * force;
|
||||
f.y += dely * force;
|
||||
f.z += delz * force;
|
||||
|
||||
if (eflag) {
|
||||
float e=r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||
ener += (e-offsetSh[mtype]);
|
||||
}
|
||||
if (vflag) {
|
||||
v0.x += delx*delx*force;
|
||||
v0.y += dely*dely*force;
|
||||
v0.z += delz*delz*force;
|
||||
v1.x += delx*dely*force;
|
||||
v1.y += delx*delz*force;
|
||||
v1.z += dely*delz*force;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (answer_pos < inum) {
|
||||
force3[answer_pos] = f;
|
||||
if (eflag)
|
||||
energy[answer_pos] = ener;
|
||||
if (vflag) {
|
||||
virial[2*answer_pos] = v0;
|
||||
virial[2*answer_pos+1] = v1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Neigbhor list version of LJ kernel */
|
||||
template<class numtyp, class acctyp>
|
||||
__global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
||||
const int *dev_ij, const int nbor_pitch, acctyp *ans,
|
||||
|
@ -36,7 +227,6 @@ __global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
|||
if (ii<4)
|
||||
sp_lj[ii]=special_lj[ii];
|
||||
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
|
||||
|
@ -158,7 +348,6 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||
}
|
||||
}
|
||||
ii+=INT_MUL(blockIdx.x,blockDim.x);
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
|
||||
|
@ -184,9 +373,11 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||
int itype=INT_MUL(MAX_SHARED_TYPES,_x_<numtyp>(i,3));
|
||||
|
||||
numtyp factor_lj;
|
||||
|
||||
for ( ; list<list_end; list++) {
|
||||
|
||||
int j=*list;
|
||||
|
||||
int j= *list;
|
||||
|
||||
if (j < nall)
|
||||
factor_lj = 1.0;
|
||||
else {
|
||||
|
@ -247,4 +438,116 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
|
||||
/* Brute force O(N^2) version of LJ kernel */
|
||||
template<class numtyp, class acctyp>
|
||||
__global__ void kernel_lj_n2(float3 *force3,
|
||||
float *energy, float3 *virial,
|
||||
float3 *pos, int *type,
|
||||
const bool eflag, const bool vflag, const int inum, const int nall)
|
||||
{
|
||||
int gid = threadIdx.x + INT_MUL(blockIdx.x, blockDim.x);
|
||||
int tid = threadIdx.x;
|
||||
__shared__ float posSh[BLOCK_1D*3];
|
||||
__shared__ int typeSh[BLOCK_1D];
|
||||
__shared__ numtyp cutsqSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ numtyp lj1Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ numtyp lj2Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ numtyp lj3Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ numtyp lj4Sh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__shared__ numtyp offsetSh[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
int itype=tid/MAX_SHARED_TYPES;
|
||||
int jtype=tid%MAX_SHARED_TYPES;
|
||||
cutsqSh[tid]=_cutsq_<numtyp>(itype,jtype);
|
||||
lj1Sh[tid]=_lj1_<numtyp>(itype,jtype).x;
|
||||
lj2Sh[tid]=_lj1_<numtyp>(itype,jtype).y;
|
||||
lj3Sh[tid]=_lj3_<numtyp>(itype,jtype).x;
|
||||
lj4Sh[tid]=_lj3_<numtyp>(itype,jtype).y;
|
||||
offsetSh[tid]=_offset_<numtyp>(itype,jtype);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
float3 f = {0.0f, 0.0f, 0.0f};
|
||||
float ener = 0.0f;
|
||||
float3 v0 = {0.0f, 0.0f, 0.0f}, v1 = {0.0f, 0.0f, 0.0f};
|
||||
|
||||
int itype, jtype;
|
||||
int mtype;
|
||||
|
||||
numtyp ix, iy, iz;
|
||||
|
||||
if (gid < inum) {
|
||||
ix = pos[gid].x;
|
||||
iy = pos[gid].y;
|
||||
iz = pos[gid].z;
|
||||
itype = type[gid];
|
||||
}
|
||||
|
||||
int pid = tid;
|
||||
int nIter = ceil((float)nall/BLOCK_1D);
|
||||
for (int jj = 0; jj < nIter; jj++, pid += BLOCK_1D) {
|
||||
|
||||
if (pid < nall) {
|
||||
posSh[tid ] = pos[pid].x;
|
||||
posSh[tid+ BLOCK_1D] = pos[pid].y;
|
||||
posSh[tid+2*BLOCK_1D] = pos[pid].z;
|
||||
typeSh[tid] = type[pid];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (gid < inum) {
|
||||
int pid_j = jj*BLOCK_1D;
|
||||
|
||||
for (int j = 0; j < BLOCK_1D; j++, pid_j++) {
|
||||
if (jj == blockIdx.x && tid == j) continue;
|
||||
if (pid_j < nall) {
|
||||
numtyp delx = ix - posSh[j ];
|
||||
numtyp dely = iy - posSh[j+ BLOCK_1D];
|
||||
numtyp delz = iz - posSh[j+2*BLOCK_1D];
|
||||
jtype = typeSh[j];
|
||||
mtype = itype + jtype*MAX_SHARED_TYPES;
|
||||
numtyp r2inv = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if (r2inv < cutsqSh[mtype]) {
|
||||
r2inv = (numtyp)1.0/r2inv;
|
||||
numtyp r6inv = r2inv * r2inv * r2inv;
|
||||
numtyp force = r2inv*r6inv*(lj1Sh[mtype]*r6inv - lj2Sh[mtype]);
|
||||
f.x += delx * force;
|
||||
f.y += dely * force;
|
||||
f.z += delz * force;
|
||||
|
||||
if (eflag) {
|
||||
numtyp e = r6inv*(lj3Sh[mtype]*r6inv - lj4Sh[mtype]);
|
||||
ener +=(e-offsetSh[mtype]);
|
||||
}
|
||||
if (vflag) {
|
||||
v0.x += delx*delx*force;
|
||||
v0.y += dely*dely*force;
|
||||
v0.z += delz*delz*force;
|
||||
v1.x += delx*dely*force;
|
||||
v1.y += delx*delz*force;
|
||||
v1.z += dely*delz*force;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (gid < inum) {
|
||||
if (eflag)
|
||||
energy[gid] = ener;
|
||||
if (vflag) {
|
||||
virial[2*gid ] = v0;
|
||||
virial[2*gid+1] = v1;
|
||||
}
|
||||
force3[gid] = f;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
lj_gpu_memory.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Global variables for GPU Lennard-Jones Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "lj_gpu_memory.h"
|
||||
#define LJ_GPU_MemoryT LJ_GPU_Memory<numtyp, acctyp>
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
lj_gpu_memory.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Global variables for GPU Lennard-Jones Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LJ_GPU_MEMORY_H
|
||||
#define LJ_GPU_MEMORY_H
|
||||
|
@ -30,8 +26,10 @@
|
|||
#include "pair_gpu_nbor.h"
|
||||
|
||||
#define BLOCK_1D 64
|
||||
#define CELL_SIZE 64
|
||||
#define MAX_SHARED_TYPES 8
|
||||
#define PERCENT_GPU_MEMORY 0.7
|
||||
#define BIG_NUMBER 100000000
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJ_GPU_Memory {
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_device.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Utilities for dealing with cuda devices
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the NVC Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Wed Jan 28 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_device.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Utilities for dealing with cuda devices
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the NVC Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Wed Jan 28 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_DEVICE
|
||||
#define NVC_DEVICE
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_get_devices.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
List properties of cuda devices
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the NVC Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Wed Jan 28 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nvc_device.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,22 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_MACROS_H
|
||||
#define NVC_MACROS_H
|
||||
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_memory.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Routines for memory management on CUDA devices
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the NVC Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Thu Jun 25 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_MEMORY_H
|
||||
#define NVC_MEMORY_H
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_timer.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Class for timing CUDA routines
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the NVC Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Feb 3 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_TIMER_H
|
||||
#define NVC_TIMER_H
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
nvc_texture_traits.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Tricks for templating textures
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_TEXTURE_TRAITS_H
|
||||
#define NVC_TEXTURE_TRAITS_H
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_gpu_atom.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Memory routines for moving atom and force data between host and gpu
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_gpu_texture.h"
|
||||
#include "pair_gpu_atom.h"
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_gpu_atom.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Memory routines for moving atom and force data between host and gpu
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_GPU_ATOM_H
|
||||
#define PAIR_GPU_ATOM_H
|
||||
|
|
|
@ -0,0 +1,454 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "lj_gpu_memory.h"
|
||||
#include "pair_gpu_cell.h"
|
||||
|
||||
static __constant__ float d_boxlo[3];
|
||||
static __constant__ float d_boxhi[3];
|
||||
static __constant__ float d_cell_size[1];
|
||||
static __constant__ float d_skin[1];
|
||||
|
||||
void init_cell_list_const(double cell_size, double skin,
|
||||
double *boxlo, double *boxhi)
|
||||
{
|
||||
float cell_size1 = cell_size;
|
||||
float skin1 = skin;
|
||||
float boxlo1[3], boxhi1[3];
|
||||
for (int i = 0; i < 3; i++) {
|
||||
boxlo1[i] = boxlo[i];
|
||||
boxhi1[i] = boxhi[i];
|
||||
}
|
||||
|
||||
cudaMemcpyToSymbol(d_cell_size, &cell_size1, sizeof(float),
|
||||
0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(d_boxlo, boxlo1, 3*sizeof(float),
|
||||
0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(d_boxhi, boxhi1, 3*sizeof(float),
|
||||
0, cudaMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(d_skin, &skin1, sizeof(float),
|
||||
0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
__global__ void kernel_set_cell_list(unsigned int *cell_idx)
|
||||
{
|
||||
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
cell_idx[gid] = BIG_NUMBER;
|
||||
}
|
||||
|
||||
// build the cell list
|
||||
__global__ void kernel_build_cell_list(float3 *cell_list,
|
||||
unsigned int *cell_idx,
|
||||
int *cell_type,
|
||||
int *cell_atom,
|
||||
float3 *pos,
|
||||
int *type,
|
||||
const int inum,
|
||||
const int nall)
|
||||
{
|
||||
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
float cSize = d_cell_size[0];
|
||||
int ncell1D = ceil(((d_boxhi[0] - d_boxlo[0]) + 2.0f*cSize) / cSize);
|
||||
|
||||
if (gid < nall) {
|
||||
float3 p = pos[gid];
|
||||
p.x = fmaxf(p.x, d_boxlo[0]-cSize);
|
||||
p.x = fminf(p.x, d_boxhi[0]+cSize);
|
||||
p.y = fmaxf(p.y, d_boxlo[1]-cSize);
|
||||
p.y = fminf(p.y, d_boxhi[1]+cSize);
|
||||
p.z = fmaxf(p.z, d_boxlo[2]-cSize);
|
||||
p.z = fminf(p.z, d_boxhi[2]+cSize);
|
||||
|
||||
int cell_id = (int)(p.x/cSize + 1.0) + (int)(p.y/cSize + 1.0) * ncell1D
|
||||
+ (int)(p.z/cSize + 1.0) * ncell1D * ncell1D;
|
||||
|
||||
int atom_pos = atomicAdd(&cell_atom[cell_id], 1);
|
||||
int pid = cell_id*CELL_SIZE + atom_pos;
|
||||
|
||||
cell_list[pid] = pos[gid];
|
||||
cell_type[pid] = type[gid];
|
||||
cell_idx [pid] = gid;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernel_test_rebuild(float3 *cell_list, int *cell_atom, int *rebuild)
|
||||
{
|
||||
|
||||
float cSize = d_cell_size[0];
|
||||
int ncell1D = ceil(((d_boxhi[0] - d_boxlo[0]) + 2.0f*cSize) / cSize);
|
||||
|
||||
// calculate 3D block idx from 2d block
|
||||
int bx = blockIdx.x;
|
||||
int by = blockIdx.y % gridDim.x;
|
||||
int bz = blockIdx.y / gridDim.x;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
// compute cell idx from 3D block idx
|
||||
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||
int pbase = INT_MUL(cid,CELL_SIZE); // atom position id in cell list
|
||||
|
||||
float skin = d_skin[0];
|
||||
float lowx = d_boxlo[0] + (bx-1)*cSize - 0.5*skin;
|
||||
float hix = lowx + cSize + skin;
|
||||
float lowy = d_boxlo[1] + (by-1)*cSize - 0.5*skin;
|
||||
float hiy = lowy + cSize + skin;
|
||||
float lowz = d_boxlo[2] + (bz-1)*cSize - 0.5*skin;
|
||||
float hiz = lowz + cSize + skin;
|
||||
|
||||
for (int i = tid; i < cell_atom[cid]; i += BLOCK_1D) {
|
||||
int pid = pbase + i;
|
||||
float3 p = cell_list[pid];
|
||||
p.x = fmaxf(p.x, d_boxlo[0]-cSize);
|
||||
p.x = fminf(p.x, d_boxhi[0]+cSize);
|
||||
p.y = fmaxf(p.y, d_boxlo[1]-cSize);
|
||||
p.y = fminf(p.y, d_boxhi[1]+cSize);
|
||||
p.z = fmaxf(p.z, d_boxlo[2]-cSize);
|
||||
p.z = fminf(p.z, d_boxhi[2]+cSize);
|
||||
|
||||
if (p.x < lowx || p.x > hix || p.y < lowy || p.y > hiy || p.z < lowz || p.z > hiz) {
|
||||
*rebuild = 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__global__ void kernel_test_overflow(int *cell_atom, int *overflow, const int ncell)
|
||||
{
|
||||
unsigned int gid = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
|
||||
if (gid < ncell) {
|
||||
if (cell_atom[gid] > CELL_SIZE)
|
||||
*overflow = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernel_copy_list(float3 *cell_list, unsigned int *cell_idx, int *cell_atom, float3 *pos)
|
||||
{
|
||||
// calculate 3D block idx from 2d block
|
||||
int bx = blockIdx.x;
|
||||
int by = blockIdx.y % gridDim.x;
|
||||
int bz = blockIdx.y / gridDim.x;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
// compute cell idx from 3D block idx
|
||||
int cid = bx + INT_MUL(by, gridDim.x) + INT_MUL(bz, gridDim.x*gridDim.x);
|
||||
int pbase = INT_MUL(cid,CELL_SIZE); // atom position id in cell list
|
||||
|
||||
for (int i = tid; i < cell_atom[cid]; i += BLOCK_1D) {
|
||||
int pid = pbase + i;
|
||||
cell_list[pid] = pos[cell_idx[pid]];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__global__ void radixSortBlocks(unsigned int *keys, float3 *values1, int *values2, unsigned int nbits, unsigned int startbit);
|
||||
|
||||
void sortBlocks(unsigned int *keys, float3 *values1, int *values2, const int size)
|
||||
{
|
||||
int i = 0;
|
||||
const unsigned int bitSize = sizeof(unsigned int)*8;
|
||||
const unsigned int bitStep = 4;
|
||||
const int gSize = size/BLOCK_1D;
|
||||
while (bitSize > i*bitStep) {
|
||||
radixSortBlocks<<<gSize, BLOCK_1D, 2*BLOCK_1D*sizeof(unsigned int)>>>(keys, values1, values2, bitStep, i*bitStep);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
#define __SYNC __syncthreads();
|
||||
#else
|
||||
#define __SYNC
|
||||
#endif
|
||||
|
||||
|
||||
#define WARP_SIZE 32
|
||||
|
||||
template<class T, int maxlevel>
|
||||
__device__ T scanwarp(T val, T* sData)
|
||||
{
|
||||
// The following is the same as 2 * RadixSort::WARP_SIZE * warpId + threadInWarp =
|
||||
// 64*(threadIdx.x >> 5) + (threadIdx.x & (RadixSort::WARP_SIZE - 1))
|
||||
int idx = 2 * threadIdx.x - (threadIdx.x & (WARP_SIZE - 1));
|
||||
sData[idx] = 0;
|
||||
idx += WARP_SIZE;
|
||||
sData[idx] = val; __SYNC
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
T t = sData[idx - 1]; __SYNC
|
||||
sData[idx] += t; __SYNC
|
||||
t = sData[idx - 2]; __SYNC
|
||||
sData[idx] += t; __SYNC
|
||||
t = sData[idx - 4]; __SYNC
|
||||
sData[idx] += t; __SYNC
|
||||
t = sData[idx - 8]; __SYNC
|
||||
sData[idx] += t; __SYNC
|
||||
t = sData[idx - 16]; __SYNC
|
||||
sData[idx] += t; __SYNC
|
||||
#else
|
||||
if (0 <= maxlevel) { sData[idx] += sData[idx - 1]; } __SYNC
|
||||
if (1 <= maxlevel) { sData[idx] += sData[idx - 2]; } __SYNC
|
||||
if (2 <= maxlevel) { sData[idx] += sData[idx - 4]; } __SYNC
|
||||
if (3 <= maxlevel) { sData[idx] += sData[idx - 8]; } __SYNC
|
||||
if (4 <= maxlevel) { sData[idx] += sData[idx -16]; } __SYNC
|
||||
#endif
|
||||
|
||||
return sData[idx] - val; // convert inclusive -> exclusive
|
||||
}
|
||||
|
||||
__device__ unsigned int scan(unsigned int idata)
|
||||
{
|
||||
extern __shared__ unsigned int ptr[];
|
||||
|
||||
unsigned int idx = threadIdx.x;
|
||||
|
||||
unsigned int val = idata;
|
||||
|
||||
val = scanwarp<unsigned int, 4>(val, ptr);
|
||||
__syncthreads();
|
||||
|
||||
if ((idx & (WARP_SIZE - 1)) == WARP_SIZE - 1)
|
||||
{
|
||||
ptr[idx >> 5] = val + idata;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
#ifndef __DEVICE_EMULATION__
|
||||
if (idx < WARP_SIZE)
|
||||
#endif
|
||||
{
|
||||
ptr[idx] = scanwarp<unsigned int, 2>(ptr[idx], ptr);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
val += ptr[idx >> 5];
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
__device__ unsigned int rank(unsigned int preds)
|
||||
{
|
||||
unsigned int address = scan(preds);
|
||||
|
||||
__shared__ unsigned int numtrue;
|
||||
if (threadIdx.x == BLOCK_1D - 1)
|
||||
{
|
||||
numtrue = address + preds;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
unsigned int rank;
|
||||
unsigned int idx = threadIdx.x;
|
||||
rank = (preds) ? address : numtrue + idx - address;
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
__device__ void radixSortBlock(unsigned int *key, float3 *value1, int *value2, unsigned int nbits, unsigned int startbit)
|
||||
{
|
||||
extern __shared__ unsigned int sMem1[];
|
||||
__shared__ float sMem2[BLOCK_1D];
|
||||
__shared__ int sMem3[BLOCK_1D];
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
for(unsigned int shift = startbit; shift < (startbit + nbits); ++shift) {
|
||||
unsigned int lsb;
|
||||
lsb = !(((*key) >> shift) & 0x1);
|
||||
|
||||
unsigned int r;
|
||||
|
||||
r = rank(lsb);
|
||||
|
||||
// This arithmetic strides the ranks across 4 CTA_SIZE regions
|
||||
sMem1[r] = *key;
|
||||
__syncthreads();
|
||||
|
||||
// The above allows us to read without 4-way bank conflicts:
|
||||
*key = sMem1[tid];
|
||||
__syncthreads();
|
||||
|
||||
sMem2[r] = (*value1).x;
|
||||
__syncthreads();
|
||||
(*value1).x = sMem2[tid];
|
||||
__syncthreads();
|
||||
|
||||
sMem2[r] = (*value1).y;
|
||||
__syncthreads();
|
||||
(*value1).y = sMem2[tid];
|
||||
__syncthreads();
|
||||
|
||||
sMem2[r] = (*value1).z;
|
||||
__syncthreads();
|
||||
(*value1).z = sMem2[tid];
|
||||
__syncthreads();
|
||||
|
||||
sMem3[r] = *value2;
|
||||
__syncthreads();
|
||||
*value2 = sMem3[tid];
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void radixSortBlocks(unsigned int *keys, float3 *values1, int *values2, unsigned int nbits, unsigned int startbit)
|
||||
{
|
||||
|
||||
extern __shared__ unsigned int sMem[];
|
||||
|
||||
int gid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int key;
|
||||
float3 value1;
|
||||
int value2;
|
||||
key = keys[gid];
|
||||
value1 = values1[gid];
|
||||
value2 = values2[gid];
|
||||
__syncthreads();
|
||||
|
||||
radixSortBlock(&key, &value1, &value2, nbits, startbit);
|
||||
|
||||
keys[gid] = key;
|
||||
values1[gid] = value1;
|
||||
values2[gid] = value2;
|
||||
}
|
||||
|
||||
static float3 *d_pos, *pos_temp;
|
||||
static int *d_type;
|
||||
static int *d_overflow, *d_rebuild;
|
||||
|
||||
void init_cell_list(cell_list &cell_list_gpu,
|
||||
const int nall,
|
||||
const int ncell,
|
||||
const int buffer)
|
||||
{
|
||||
cudaMalloc((void**)&(cell_list_gpu.pos), ncell*buffer*sizeof(float3));
|
||||
cudaMalloc((void**)&(cell_list_gpu.idx), ncell*buffer*sizeof(unsigned int));
|
||||
cudaMalloc((void**)&(cell_list_gpu.type), ncell*buffer*sizeof(int));
|
||||
cudaMalloc((void**)&(cell_list_gpu.natom), ncell*sizeof(int));
|
||||
|
||||
cudaMallocHost((void**)&pos_temp, nall*sizeof(float3));
|
||||
cudaMalloc((void**)&d_pos, nall*sizeof(float3));
|
||||
cudaMalloc((void**)&d_type, nall*sizeof(int));
|
||||
cudaMalloc((void**)&d_overflow, sizeof(int));
|
||||
cudaMalloc((void**)&d_rebuild, sizeof(int));
|
||||
|
||||
cudaMemset(cell_list_gpu.natom, 0, ncell*sizeof(int));
|
||||
cudaMemset(cell_list_gpu.pos, 0, ncell*buffer*sizeof(float3));
|
||||
}
|
||||
|
||||
void clear_cell_list(cell_list &cell_list_gpu)
|
||||
{
|
||||
cudaFree(cell_list_gpu.pos);
|
||||
cudaFree(cell_list_gpu.idx);
|
||||
cudaFree(cell_list_gpu.natom);
|
||||
cudaFree(cell_list_gpu.type);
|
||||
|
||||
cudaFreeHost(pos_temp);
|
||||
cudaFree(d_pos);
|
||||
cudaFree(d_type);
|
||||
cudaFree(d_overflow);
|
||||
cudaFree(d_rebuild);
|
||||
}
|
||||
|
||||
|
||||
void build_cell_list(double *atom_pos, int *atom_type,
|
||||
cell_list &cell_list_gpu,
|
||||
const int ncell, const int ncell1D, const int buffer,
|
||||
const int inum, const int nall, const int ago)
|
||||
{
|
||||
cudaMemset(d_overflow, 0, sizeof(int));
|
||||
cudaMemset(d_rebuild, 0, sizeof(int));
|
||||
|
||||
// copy position and type to GPU
|
||||
for (int i = 0; i < 3*nall; i+=3) {
|
||||
pos_temp[i/3] = make_float3(atom_pos[i], atom_pos[i+1], atom_pos[i+2]);
|
||||
}
|
||||
cudaMemcpy(d_pos, pos_temp, nall*sizeof(float3), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_type, atom_type, nall*sizeof(int), cudaMemcpyHostToDevice);
|
||||
|
||||
static int first_build = 1;
|
||||
int rebuild = 0;
|
||||
|
||||
// copy the last built cell-list and test whether it needs to be rebuilt
|
||||
if (!first_build) {
|
||||
dim3 block(BLOCK_1D);
|
||||
dim3 grid(ncell1D, ncell1D*ncell1D);
|
||||
kernel_copy_list<<<grid, block>>>(cell_list_gpu.pos,
|
||||
cell_list_gpu.idx,
|
||||
cell_list_gpu.natom, d_pos);
|
||||
cudaMemset(d_rebuild, 0, sizeof(int));
|
||||
int *temp = (int*)malloc(sizeof(int)*ncell);
|
||||
kernel_test_rebuild<<<grid, block>>>(cell_list_gpu.pos,
|
||||
cell_list_gpu.natom,
|
||||
d_rebuild);
|
||||
cudaMemcpy(&rebuild, d_rebuild, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/*if (!first_build) {
|
||||
dim3 block(BLOCK_1D);
|
||||
dim3 grid(ncell1D, ncell1D*ncell1D);
|
||||
kernel_copy_list<<<grid, block>>>(cell_list_gpu.pos,
|
||||
cell_list_gpu.idx,
|
||||
cell_list_gpu.natom, d_pos);
|
||||
}*/
|
||||
if (ago == 0) rebuild = 1;
|
||||
|
||||
// build cell-list for the first time
|
||||
if (first_build || rebuild) {
|
||||
first_build = 0;
|
||||
// cout << "Building cell list..." << endl;
|
||||
cudaMemset(cell_list_gpu.natom, 0, ncell*sizeof(int));
|
||||
// initialize d_cell_idx for the sorting routine
|
||||
kernel_set_cell_list<<<ncell, buffer>>>(cell_list_gpu.idx);
|
||||
|
||||
// build cell list
|
||||
dim3 blockDim(128);
|
||||
dim3 gridDim(static_cast<int>(ceil(static_cast<double>(nall)/blockDim.x)));
|
||||
kernel_build_cell_list<<<gridDim, blockDim>>>(cell_list_gpu.pos,
|
||||
cell_list_gpu.idx,
|
||||
cell_list_gpu.type,
|
||||
cell_list_gpu.natom,
|
||||
d_pos, d_type, inum, nall);
|
||||
|
||||
// check cell list overflow
|
||||
int overflow;
|
||||
int gDimCell = static_cast<int>(ceil(static_cast<double>(ncell)/BLOCK_1D));
|
||||
kernel_test_overflow<<<gDimCell, BLOCK_1D>>>(cell_list_gpu.natom,
|
||||
d_overflow, ncell);
|
||||
cudaMemcpy(&overflow, d_overflow, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if (overflow > 0) {
|
||||
printf("\n\nBLOCK_1D too small for cell list, please increase it!\n\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// sort atoms in every cell by atom index to avoid floating point associativity problem.
|
||||
sortBlocks(cell_list_gpu.idx, cell_list_gpu.pos,
|
||||
cell_list_gpu.type, ncell*buffer);
|
||||
|
||||
cudaThreadSynchronize();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_GPU_CELL_H
|
||||
#define PAIR_GPU_CELL_H
|
||||
|
||||
|
||||
typedef struct {
|
||||
float3 *pos;
|
||||
unsigned int *idx;
|
||||
int *type;
|
||||
int *natom;
|
||||
} cell_list;
|
||||
|
||||
__global__ void kernel_set_cell_list(unsigned int *cell_idx);
|
||||
__global__ void kernel_build_cell_list(float3 *cell_list,
|
||||
unsigned int *cell_idx,
|
||||
int *cell_type,
|
||||
int *cell_atom,
|
||||
float3 *pos,
|
||||
int *type,
|
||||
const int inum,
|
||||
const int nall);
|
||||
__global__ void kernel_test_rebuild(float3 *cell_list, int *cell_atom, int *rebuild);
|
||||
__global__ void kernel_copy_list(float3 *cell_list,
|
||||
unsigned int *cell_idx,
|
||||
int *cell_atom,
|
||||
float3 *pos);
|
||||
__global__ void kernel_test_overflow(int *cell_atom, int *overflow, const int ncell);
|
||||
void sortBlocks(unsigned int *keys, float3 *values1, int *values2, const int size);
|
||||
|
||||
void init_cell_list_const(double cell_size, double skin,
|
||||
double *boxlo, double *boxhi);
|
||||
void init_cell_list(cell_list &cell_list_gpu,
|
||||
const int nall,
|
||||
const int ncell,
|
||||
const int buffer);
|
||||
|
||||
void build_cell_list(double *atom_pos, int *atom_type,
|
||||
cell_list &cell_list_gpu,
|
||||
const int ncell, const int ncell1D, const int buffer,
|
||||
const int inum, const int nall, const int ago);
|
||||
|
||||
void clear_cell_list(cell_list &cell_list_gpu);
|
||||
|
||||
#endif
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_gpu_nbor.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Neighbor memory operations for LAMMPS GPU Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_gpu_nbor.h"
|
||||
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_gpu_nbor.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Neighbor memory operations for LAMMPS GPU Library
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Aug 4 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_GPU_NBOR_H
|
||||
#define PAIR_GPU_NBOR_H
|
||||
|
|
|
@ -1,25 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_gpu_texture.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Tricks for templating textures
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nvc_traits.h"
|
||||
#include "nvc_memory.h"
|
||||
|
|
|
@ -1,26 +1,21 @@
|
|||
/***************************************************************************
|
||||
pair_tex_tar.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
"Tar" of header and source files that need texture reference definitions
|
||||
within file scope.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_gpu_atom.cu"
|
||||
#include "lj_gpu.cu"
|
||||
|
|
Loading…
Reference in New Issue