forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3056 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
bd0d78aeac
commit
5c4b5a82aa
|
@ -26,40 +26,30 @@ CUDA = $(CUDA_CPP) $(CUDA_ARCH) $(CUDA_PREC)
|
|||
CUDA_LIB = $(OBJ_DIR)/libgpu.a
|
||||
|
||||
# Headers for CUDA Stuff
|
||||
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h
|
||||
NVC_H = nvc_macros.h nvc_device.h nvc_timer.h nvc_memory.h nvc_traits.h
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = pair_gpu_texture.h pair_gpu_atom.h pair_gpu_nbor.h
|
||||
# Dependencies for the Texture Tar
|
||||
TAR_H = $(NVC_H) $(PAIR_H) pair_gpu_atom.cu lj_gpu_memory.h lj_gpu_memory.cu \
|
||||
lj_gpu_kernel.h lj_gpu.cu gb_gpu_memory.h gb_gpu_memory.cu \
|
||||
gb_gpu_extra.h gb_gpu_kernel.h gb_gpu.cu
|
||||
|
||||
ALL_H = $(NVC_H) $(PAIR_H)
|
||||
|
||||
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||
OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/gb_gpu.cu_o \
|
||||
$(OBJ_DIR)/gb_gpu_memory.cu_o $(OBJ_DIR)/lj_gpu.cu_o \
|
||||
$(OBJ_DIR)/lj_gpu_memory.cu_o $(OBJ_DIR)/pair_gpu_atom.cu_o \
|
||||
$(OBJ_DIR)/pair_gpu_nbor.cu_o
|
||||
OBJS = $(OBJ_DIR)/nvc_device.o $(OBJ_DIR)/pair_gpu_nbor.cu_o \
|
||||
$(OBJ_DIR)/pair_tex_tar.cu_o
|
||||
|
||||
all: $(CUDA_LIB) $(EXECS)
|
||||
|
||||
$(OBJ_DIR)/nvc_device.o: nvc_device.cu $(NVC_H)
|
||||
$(CUDA) -o $@ -c nvc_device.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_atom.cu_o: pair_gpu_atom.cu pair_gpu_texture.h pair_gpu_atom.h $(NVC_H)
|
||||
$(CUDA) -o $@ -c pair_gpu_atom.cu
|
||||
|
||||
$(OBJ_DIR)/pair_gpu_nbor.cu_o: pair_gpu_nbor.cu pair_gpu_texture.h pair_gpu_nbor.h $(NVC_H)
|
||||
$(CUDA) -o $@ -c pair_gpu_nbor.cu
|
||||
|
||||
$(OBJ_DIR)/lj_gpu_memory.cu_o: lj_gpu_memory.cu lj_gpu_memory.h $(ALL_H)
|
||||
$(CUDA) -o $@ -c lj_gpu_memory.cu
|
||||
|
||||
$(OBJ_DIR)/lj_gpu.cu_o: lj_gpu.cu $(ALL_H) lj_gpu_memory.h lj_gpu_kernel.h
|
||||
$(CUDA) -o $@ -c lj_gpu.cu
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_memory.cu_o: gb_gpu_memory.cu gb_gpu_memory.h $(ALL_H)
|
||||
$(CUDA) -o $@ -c gb_gpu_memory.cu
|
||||
|
||||
$(OBJ_DIR)/gb_gpu.cu_o: gb_gpu.cu $(ALL_H) gb_gpu_memory.h gb_gpu_kernel.h gb_gpu_extra.h
|
||||
$(CUDA) -o $@ -c gb_gpu.cu
|
||||
$(OBJ_DIR)/pair_tex_tar.cu_o: $(TAR_H)
|
||||
$(CUDA) -o $@ -c pair_tex_tar.cu
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: nvc_get_devices.cu $(NVC_H) $(OBJ_DIR)/nvc_device.o
|
||||
$(CUDA) -o $@ nvc_get_devices.cu $(CUDALNK) $(OBJ_DIR)/nvc_device.o
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "nvc_macros.h"
|
||||
#include "nvc_timer.h"
|
||||
#include "nvc_device.h"
|
||||
#include "gb_gpu_memory.h"
|
||||
#include "gb_gpu_memory.cu"
|
||||
#include "gb_gpu_kernel.h"
|
||||
|
||||
using namespace std;
|
||||
|
|
|
@ -314,12 +314,12 @@ static __inline__ __device__ void gpu_quat_to_mat_trans(const int qi,
|
|||
numtyp i2 = qi4*qi4;
|
||||
numtyp j2 = qi5*qi5;
|
||||
numtyp k2 = qi6*qi6;
|
||||
numtyp twoij = 2.0*qi4*qi5;
|
||||
numtyp twoik = 2.0*qi4*qi6;
|
||||
numtyp twojk = 2.0*qi5*qi6;
|
||||
numtyp twoiw = 2.0*qi4*qi3;
|
||||
numtyp twojw = 2.0*qi5*qi3;
|
||||
numtyp twokw = 2.0*qi6*qi3;
|
||||
numtyp twoij = (numtyp)2.0*qi4*qi5;
|
||||
numtyp twoik = (numtyp)2.0*qi4*qi6;
|
||||
numtyp twojk = (numtyp)2.0*qi5*qi6;
|
||||
numtyp twoiw = (numtyp)2.0*qi4*qi3;
|
||||
numtyp twojw = (numtyp)2.0*qi5*qi3;
|
||||
numtyp twokw = (numtyp)2.0*qi6*qi3;
|
||||
|
||||
mat[0] = w2+i2-j2-k2;
|
||||
mat[3] = twoij-twokw;
|
||||
|
|
|
@ -153,7 +153,7 @@ __global__ void kernel_gayberne(const numtyp *gum, const numtyp *special_lj,
|
|||
|
||||
int j=*nbor;
|
||||
if (j < nall)
|
||||
factor_lj = 1.0;
|
||||
factor_lj = (numtyp)1.0;
|
||||
else {
|
||||
factor_lj = sp_lj[j/nall];
|
||||
j %= nall;
|
||||
|
@ -443,7 +443,7 @@ __global__ void kernel_sphere_gb(const numtyp *gum, const numtyp *special_lj,
|
|||
|
||||
int j=*nbor;
|
||||
if (j < nall)
|
||||
factor_lj = 1.0;
|
||||
factor_lj = (numtyp)1.0;
|
||||
else {
|
||||
factor_lj = sp_lj[j/nall];
|
||||
j %= nall;
|
||||
|
@ -675,7 +675,7 @@ __global__ void kernel_lj(const numtyp *special_lj, const int *dev_nbor,
|
|||
|
||||
int j=*list;
|
||||
if (j < nall)
|
||||
factor_lj = 1.0;
|
||||
factor_lj = (numtyp)1.0;
|
||||
else {
|
||||
factor_lj = sp_lj[j/nall];
|
||||
j %= nall;
|
||||
|
@ -799,7 +799,7 @@ __global__ void kernel_lj_fast(const numtyp *special_lj, const int *dev_nbor,
|
|||
|
||||
int j=*list;
|
||||
if (j < nall)
|
||||
factor_lj = 1.0;
|
||||
factor_lj = (numtyp)1.0;
|
||||
else {
|
||||
factor_lj = sp_lj[j/nall];
|
||||
j %= nall;
|
||||
|
|
|
@ -73,21 +73,21 @@ int* GB_GPU_MemoryT::init(const int ij_size, const int ntypes,
|
|||
host_write[2]=static_cast<numtyp>(mu);
|
||||
gamma_upsilon_mu.copy_from_host(host_write.begin());
|
||||
|
||||
lshape.safe_alloc(ntypes);
|
||||
lshape.safe_alloc(ntypes,lshape_get_texture<numtyp>());
|
||||
lshape.cast_copy(host_lshape,host_write);
|
||||
lshape.copy_from_host(host_write.begin());
|
||||
|
||||
// Copy shape, well, sigma, epsilon, and cutsq onto GPU
|
||||
shape.safe_alloc(ntypes,3);
|
||||
shape.safe_alloc(ntypes,3,shape_get_texture<numtyp>());
|
||||
shape.cast_copy(host_shape[0],host_write);
|
||||
well.safe_alloc(ntypes,3);
|
||||
well.safe_alloc(ntypes,3,well_get_texture<numtyp>());
|
||||
well.cast_copy(host_well[0],host_write);
|
||||
|
||||
// Copy LJ data onto GPU
|
||||
int lj_types=ntypes;
|
||||
if (lj_types<=MAX_SHARED_TYPES)
|
||||
lj_types=MAX_SHARED_TYPES;
|
||||
form.safe_alloc(lj_types,lj_types);
|
||||
form.safe_alloc(lj_types,lj_types,form_get_texture());
|
||||
form.copy_2Dfrom_host(host_form[0],ntypes,ntypes);
|
||||
|
||||
// See if we want fast GB-sphere or sphere-sphere calculations
|
||||
|
@ -100,12 +100,6 @@ int* GB_GPU_MemoryT::init(const int ij_size, const int ntypes,
|
|||
// Memory for ilist ordered by particle type
|
||||
host_olist.safe_alloc_rw(this->max_atoms);
|
||||
|
||||
// Bind constant data to textures
|
||||
lshape_bind_texture<numtyp>(lshape);
|
||||
shape_bind_texture<numtyp>(shape);
|
||||
well_bind_texture<numtyp>(well);
|
||||
form_bind_texture(form);
|
||||
|
||||
return this->nbor.host_ij.begin();
|
||||
}
|
||||
|
||||
|
@ -124,9 +118,7 @@ void GB_GPU_MemoryT::clear() {
|
|||
|
||||
LJ_GPU_MemoryT::clear();
|
||||
|
||||
shape_unbind_texture<numtyp>();
|
||||
well_unbind_texture<numtyp>();
|
||||
form_unbind_texture();
|
||||
lshape.unbind();
|
||||
|
||||
shape.clear();
|
||||
well.clear();
|
||||
|
|
|
@ -26,12 +26,11 @@
|
|||
|
||||
#define MAX_GPU_THREADS 4
|
||||
#include "lj_gpu_memory.h"
|
||||
#define LJ_GPU_MemoryT LJ_GPU_Memory<numtyp,acctyp>
|
||||
|
||||
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class GB_GPU_Memory : public LJ_GPU_MemoryT {
|
||||
class GB_GPU_Memory : public LJ_GPU_Memory<numtyp,acctyp> {
|
||||
public:
|
||||
GB_GPU_Memory();
|
||||
~GB_GPU_Memory();
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
#include "nvc_macros.h"
|
||||
#include "nvc_timer.h"
|
||||
#include "nvc_device.h"
|
||||
#include "lj_gpu_memory.h"
|
||||
#include "pair_gpu_texture.h"
|
||||
#include "lj_gpu_memory.cu"
|
||||
#include "lj_gpu_kernel.h"
|
||||
|
||||
using namespace std;
|
||||
|
|
|
@ -75,11 +75,11 @@ int* LJ_GPU_MemoryT::init(const int ij_size, const int ntypes,
|
|||
special_lj.cast_copy(host_special_lj,host_write);
|
||||
|
||||
// Copy sigma, epsilon, and cutsq onto GPU
|
||||
sigma.safe_alloc(ntypes,ntypes);
|
||||
sigma.safe_alloc(ntypes,ntypes,sigma_get_texture<numtyp>());
|
||||
sigma.cast_copy(host_sigma[0],host_write);
|
||||
epsilon.safe_alloc(ntypes,ntypes);
|
||||
epsilon.safe_alloc(ntypes,ntypes,epsilon_get_texture<numtyp>());
|
||||
epsilon.cast_copy(host_epsilon[0],host_write);
|
||||
cutsq.safe_alloc(ntypes,ntypes);
|
||||
cutsq.safe_alloc(ntypes,ntypes,cutsq_get_texture<numtyp>());
|
||||
cutsq.cast_copy(host_cutsq[0],host_write);
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
|
@ -89,35 +89,27 @@ int* LJ_GPU_MemoryT::init(const int ij_size, const int ntypes,
|
|||
lj_types=MAX_SHARED_TYPES;
|
||||
shared_types=true;
|
||||
}
|
||||
offset.safe_alloc(lj_types,lj_types);
|
||||
offset.safe_alloc(lj_types,lj_types,offset_get_texture<numtyp>());
|
||||
offset.cast_copy2D(host_offset[0],host_write,ntypes,ntypes);
|
||||
double *t1=host_lj1[0];
|
||||
double *t2=host_lj2[0];
|
||||
for (int i=0; i<lj_types*lj_types; i++) {
|
||||
for (int i=0; i<ntypes*ntypes; i++) {
|
||||
host_write[i*2]=t1[i];
|
||||
host_write[i*2+1]=t2[i];
|
||||
}
|
||||
lj1.safe_alloc(lj_types,lj_types);
|
||||
lj1.copy_2Dfrom_host(reinterpret_cast<typename cu_vec_traits<numtyp>::vec2 *> (host_write.begin()),
|
||||
lj1.safe_alloc(lj_types,lj_types,lj1_get_texture<numtyp>());
|
||||
lj1.copy_2Dfrom_host(reinterpret_cast<typename nvc_vec_traits<numtyp>::vec2 *> (host_write.begin()),
|
||||
ntypes,ntypes);
|
||||
t1=host_lj3[0];
|
||||
t2=host_lj4[0];
|
||||
for (int i=0; i<lj_types*lj_types; i++) {
|
||||
for (int i=0; i<ntypes*ntypes; i++) {
|
||||
host_write[i*2]=t1[i];
|
||||
host_write[i*2+1]=t2[i];
|
||||
}
|
||||
lj3.safe_alloc(lj_types,lj_types);
|
||||
lj3.copy_2Dfrom_host(reinterpret_cast<typename cu_vec_traits<numtyp>::vec2 *> (host_write.begin()),
|
||||
lj3.safe_alloc(lj_types,lj_types,lj3_get_texture<numtyp>());
|
||||
lj3.copy_2Dfrom_host(reinterpret_cast<typename nvc_vec_traits<numtyp>::vec2 *> (host_write.begin()),
|
||||
ntypes,ntypes);
|
||||
|
||||
// Bind constant data to textures
|
||||
sigma_bind_texture<numtyp>(sigma);
|
||||
epsilon_bind_texture<numtyp>(epsilon);
|
||||
cutsq_bind_texture<numtyp>(cutsq);
|
||||
offset_bind_texture<numtyp>(offset);
|
||||
lj1_bind_texture<typename cu_vec_traits<numtyp>::vec2>(lj1);
|
||||
lj3_bind_texture<typename cu_vec_traits<numtyp>::vec2>(lj3);
|
||||
|
||||
dev_error.safe_alloc(1);
|
||||
dev_error.zero();
|
||||
|
||||
|
@ -139,13 +131,6 @@ void LJ_GPU_MemoryT::clear() {
|
|||
atom.clear();
|
||||
nbor.clear();
|
||||
|
||||
sigma_unbind_texture<numtyp>();
|
||||
epsilon_unbind_texture<numtyp>();
|
||||
cutsq_unbind_texture<numtyp>();
|
||||
offset_unbind_texture<numtyp>();
|
||||
lj1_unbind_texture<typename cu_vec_traits<numtyp>::vec2>();
|
||||
lj3_unbind_texture<typename cu_vec_traits<numtyp>::vec2>();
|
||||
|
||||
CUDA_SAFE_CALL(cudaStreamDestroy(pair_stream));
|
||||
|
||||
dev_error.clear();
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#define LJ_GPU_MEMORY_H
|
||||
|
||||
#include "nvc_device.h"
|
||||
#include "nvc_traits.h"
|
||||
#include "pair_gpu_atom.h"
|
||||
#include "pair_gpu_nbor.h"
|
||||
|
||||
|
@ -70,7 +71,7 @@ class LJ_GPU_Memory {
|
|||
|
||||
// --------------- Const Data for Atoms
|
||||
NVC_ConstMatT sigma, epsilon, cutsq, offset;
|
||||
NVC_ConstMat< typename cu_vec_traits<numtyp>::vec2 > lj1, lj3;
|
||||
NVC_ConstMat< typename nvc_vec_traits<numtyp>::vec2 > lj1, lj3;
|
||||
NVC_VecT special_lj;
|
||||
|
||||
size_t max_atoms;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#define NVC_MEMORY_H
|
||||
|
||||
#include <iostream>
|
||||
#include "nvc_macros.h"
|
||||
|
||||
#define NVC_HostT NVC_Host<numtyp>
|
||||
#define NVC_HostD NVC_Host<double>
|
||||
|
@ -196,6 +197,10 @@ class NVC_Vec {
|
|||
_end=_array+cols;
|
||||
}
|
||||
|
||||
// Row vector on device (allocate and assign texture and bind)
|
||||
inline void safe_alloc(const size_t cols, textureReference *t)
|
||||
{ safe_alloc(cols); assign_texture(t); bind(); }
|
||||
|
||||
/// Free any memory associated with device
|
||||
inline void clear()
|
||||
{ if (_cols>0) { _cols=0; CUDA_SAFE_CALL(cudaFree(_array)); } }
|
||||
|
@ -242,17 +247,22 @@ class NVC_Vec {
|
|||
copy_from_host(host_write.begin());
|
||||
}
|
||||
|
||||
/// Assign a texture to matrix
|
||||
inline void assign_texture(textureReference *t) { _tex_ptr=t; }
|
||||
|
||||
/// Bind to texture
|
||||
template <class texture>
|
||||
inline void bind_texture(texture &texi, cudaChannelFormatDesc &channel) {
|
||||
NVC::cuda_gb_get_channel<numtyp>(channel);
|
||||
texi.addressMode[0] = cudaAddressModeClamp;
|
||||
texi.addressMode[1] = cudaAddressModeClamp;
|
||||
texi.filterMode = cudaFilterModePoint;
|
||||
texi.normalized = false;
|
||||
CUDA_SAFE_CALL(cudaBindTexture(NULL,&texi,_array,&channel));
|
||||
inline void bind() {
|
||||
NVC::cuda_gb_get_channel<numtyp>(_channel);
|
||||
(*_tex_ptr).addressMode[0] = cudaAddressModeClamp;
|
||||
(*_tex_ptr).addressMode[1] = cudaAddressModeClamp;
|
||||
(*_tex_ptr).filterMode = cudaFilterModePoint;
|
||||
(*_tex_ptr).normalized = false;
|
||||
CUDA_SAFE_CALL(cudaBindTexture(NULL,_tex_ptr,_array,&_channel));
|
||||
}
|
||||
|
||||
/// Unbind texture
|
||||
inline void unbind() { CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr)); }
|
||||
|
||||
/// Output the vector (debugging)
|
||||
inline void print(std::ostream &out) { print (out, numel()); }
|
||||
|
||||
|
@ -268,6 +278,8 @@ class NVC_Vec {
|
|||
private:
|
||||
numtyp *_array, *_end;
|
||||
size_t _row_bytes, _row_size, _rows, _cols;
|
||||
cudaChannelFormatDesc _channel;
|
||||
textureReference *_tex_ptr;
|
||||
};
|
||||
|
||||
/// 2D Matrix on device (can have extra column storage to get correct alignment)
|
||||
|
@ -340,7 +352,10 @@ class NVC_ConstMat {
|
|||
public:
|
||||
NVC_ConstMat() { _rows=0; }
|
||||
~NVC_ConstMat() { if (_rows>0) CUDA_SAFE_CALL(cudaFreeArray(_array)); }
|
||||
|
||||
|
||||
/// Assign a texture to matrix
|
||||
inline void assign_texture(textureReference *t) { _tex_ptr=t; }
|
||||
|
||||
/// Row major matrix on device
|
||||
inline void safe_alloc(const size_t rows, const size_t cols) {
|
||||
_rows=rows;
|
||||
|
@ -350,19 +365,31 @@ class NVC_ConstMat {
|
|||
CUDA_SAFE_CALL(cudaMallocArray(&_array, &_channel, cols, rows));
|
||||
}
|
||||
|
||||
/// Row major matrix on device (Allocate and bind texture)
|
||||
inline void safe_alloc(const size_t rows, const size_t cols,
|
||||
textureReference *t)
|
||||
{ safe_alloc(rows,cols); assign_texture(t); bind(); }
|
||||
|
||||
/// Bind to texture
|
||||
template <class texture>
|
||||
inline void bind_texture(texture &texi) {
|
||||
texi.addressMode[0] = cudaAddressModeClamp;
|
||||
texi.addressMode[1] = cudaAddressModeClamp;
|
||||
texi.filterMode = cudaFilterModePoint;
|
||||
texi.normalized = false;
|
||||
CUDA_SAFE_CALL(cudaBindTextureToArray(&texi,_array,&_channel));
|
||||
inline void bind() {
|
||||
(*_tex_ptr).addressMode[0] = cudaAddressModeClamp;
|
||||
(*_tex_ptr).addressMode[1] = cudaAddressModeClamp;
|
||||
(*_tex_ptr).filterMode = cudaFilterModePoint;
|
||||
(*_tex_ptr).normalized = false;
|
||||
CUDA_SAFE_CALL(cudaBindTextureToArray(_tex_ptr,_array,&_channel));
|
||||
}
|
||||
|
||||
/// Free any memory associated with device
|
||||
inline void clear()
|
||||
{ if (_rows>0) { _rows=0; CUDA_SAFE_CALL(cudaFreeArray(_array)); } }
|
||||
/// Unbind texture
|
||||
inline void unbind() { CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr)); }
|
||||
|
||||
/// Free any memory associated with device and unbind
|
||||
inline void clear() {
|
||||
if (_rows>0) {
|
||||
_rows=0;
|
||||
CUDA_SAFE_CALL(cudaUnbindTexture(_tex_ptr));
|
||||
CUDA_SAFE_CALL(cudaFreeArray(_array));
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t numel() const { return _cols*_rows; }
|
||||
inline size_t rows() const { return _rows; }
|
||||
|
@ -442,6 +469,7 @@ class NVC_ConstMat {
|
|||
size_t _rows, _cols;
|
||||
cudaArray *_array;
|
||||
cudaChannelFormatDesc _channel;
|
||||
textureReference *_tex_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/***************************************************************************
|
||||
nvc_texture_traits.h
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
|
||||
Tricks for templating textures
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NVC_TEXTURE_TRAITS_H
|
||||
#define NVC_TEXTURE_TRAITS_H
|
||||
|
||||
template <class numtyp> class nvc_vec_traits;
|
||||
template <> class nvc_vec_traits<float> { public: typedef float2 vec2; };
|
||||
template <> class nvc_vec_traits<double> { public: typedef double2 vec2; };
|
||||
|
||||
#endif
|
|
@ -21,7 +21,9 @@
|
|||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_gpu_texture.h"
|
||||
#include "pair_gpu_atom.h"
|
||||
|
||||
#define PairGPUAtomT PairGPUAtom<numtyp,acctyp>
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
@ -41,8 +43,7 @@ void PairGPUAtomT::init(const int max_atoms) {
|
|||
time_answer.init();
|
||||
|
||||
// Device matrices for atom and force data
|
||||
dev_x.safe_alloc(atom_fields(),max_atoms);
|
||||
x_bind_texture<numtyp>(dev_x);
|
||||
dev_x.safe_alloc(atom_fields(),max_atoms,x_get_texture<numtyp>());
|
||||
ans.safe_alloc(ans_fields(),max_atoms);
|
||||
|
||||
// Get a host write only buffer
|
||||
|
@ -59,7 +60,7 @@ void PairGPUAtomT::clear() {
|
|||
return;
|
||||
allocated=false;
|
||||
|
||||
x_unbind_texture<numtyp>();
|
||||
dev_x.unbind();
|
||||
ans.clear();
|
||||
host_write.clear();
|
||||
host_read.clear();
|
||||
|
|
|
@ -40,12 +40,12 @@
|
|||
|
||||
#ifndef PRECISION
|
||||
#define PRECISION float
|
||||
#define ACC_PRECISION double
|
||||
#define ACC_PRECISION float
|
||||
#define MAX_ATOMS 65536
|
||||
#endif
|
||||
|
||||
#include "nvc_timer.h"
|
||||
#include "pair_gpu_texture.h"
|
||||
#include "nvc_memory.h"
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class PairGPUAtom {
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#include "nvc_traits.h"
|
||||
#include "nvc_memory.h"
|
||||
|
||||
#ifndef PAIR_GPU_TEXTURE_H
|
||||
|
@ -34,23 +35,20 @@
|
|||
#define GB_GPU_DOUBLE
|
||||
#endif
|
||||
|
||||
template <class numtyp> class cu_vec_traits;
|
||||
template <> class cu_vec_traits<float> { public: typedef float2 vec2; };
|
||||
template <> class cu_vec_traits<double> { public: typedef double2 vec2; };
|
||||
|
||||
// ------------------------------- x ------------------------------------
|
||||
|
||||
static texture<float, 2, cudaReadModeElementType> x_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> x_double_tex;
|
||||
template <class numtyp> inline void x_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(x_float_tex); }
|
||||
|
||||
template <> inline void x_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(x_double_tex); }
|
||||
template <class numtyp> inline void x_unbind_texture()
|
||||
{ cudaUnbindTexture(x_float_tex); }
|
||||
template <> inline void x_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(x_double_tex); }
|
||||
template <class numtyp> inline textureReference * x_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"x_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * x_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"x_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _x_(const int i, const int j) {
|
||||
return tex2D(x_float_tex,i,j);
|
||||
|
@ -66,10 +64,11 @@ static __inline__ __device__ double _x_<double>(const int i,const int j) {
|
|||
// ------------------------------- form ------------------------------------
|
||||
|
||||
static texture<int, 2, cudaReadModeElementType> form_tex;
|
||||
inline void form_bind_texture(NVC_ConstMatI &mat)
|
||||
{ mat.bind_texture(form_tex); }
|
||||
inline void form_unbind_texture()
|
||||
{ cudaUnbindTexture(form_tex); }
|
||||
inline textureReference * form_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"form_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
static __inline__ __device__ int _form_(const int i, const int j) {
|
||||
return tex2D(form_tex,i,j);
|
||||
}
|
||||
|
@ -78,15 +77,16 @@ static __inline__ __device__ int _form_(const int i, const int j) {
|
|||
|
||||
static texture<float, 1, cudaReadModeElementType> lshape_float_tex;
|
||||
static texture<int2, 1, cudaReadModeElementType> lshape_double_tex;
|
||||
static cudaChannelFormatDesc channel_lshape;
|
||||
template <class numtyp> inline void lshape_bind_texture(NVC_VecT &vec)
|
||||
{ vec.bind_texture(lshape_float_tex,channel_lshape); }
|
||||
template <> inline void lshape_bind_texture<double>(NVC_VecD &vec)
|
||||
{ vec.bind_texture(lshape_double_tex,channel_lshape); }
|
||||
template <class numtyp> inline void lshape_unbind_texture()
|
||||
{ cudaUnbindTexture(lshape_float_tex); }
|
||||
template <> inline void lshape_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(lshape_double_tex); }
|
||||
template <class numtyp> inline textureReference * lshape_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lshape_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * lshape_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lshape_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _lshape_(const int i)
|
||||
{ return tex1Dfetch(lshape_float_tex,i); }
|
||||
|
@ -102,14 +102,16 @@ static __inline__ __device__ double _lshape_<double>(const int i) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> shape_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> shape_double_tex;
|
||||
template <class numtyp> inline void shape_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(shape_float_tex); }
|
||||
template <> inline void shape_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(shape_double_tex); }
|
||||
template <class numtyp> inline void shape_unbind_texture()
|
||||
{ cudaUnbindTexture(shape_float_tex); }
|
||||
template <> inline void shape_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(shape_double_tex); }
|
||||
template <class numtyp> inline textureReference * shape_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"shape_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * shape_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"shape_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _shape_(const int i, const int j) {
|
||||
return tex2D(shape_float_tex,j,i);
|
||||
|
@ -126,14 +128,16 @@ static __inline__ __device__ double _shape_<double>(const int i, const int j) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> well_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> well_double_tex;
|
||||
template <class numtyp> inline void well_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(well_float_tex); }
|
||||
template <> inline void well_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(well_double_tex); }
|
||||
template <class numtyp> inline void well_unbind_texture()
|
||||
{ cudaUnbindTexture(well_float_tex); }
|
||||
template <> inline void well_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(well_double_tex); }
|
||||
template <class numtyp> inline textureReference * well_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"well_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * well_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"well_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _well_(const int i, const int j)
|
||||
{ return tex2D(well_float_tex,j,i); }
|
||||
|
@ -149,14 +153,16 @@ static __inline__ __device__ double _well_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> sigma_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> sigma_double_tex;
|
||||
template <class numtyp> inline void sigma_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(sigma_float_tex); }
|
||||
template <> inline void sigma_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(sigma_double_tex); }
|
||||
template <class numtyp> inline void sigma_unbind_texture()
|
||||
{ cudaUnbindTexture(sigma_float_tex); }
|
||||
template <> inline void sigma_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(sigma_double_tex); }
|
||||
template <class numtyp> inline textureReference * sigma_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"sigma_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * sigma_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"sigma_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _sigma_(const int i, const int j) {
|
||||
return tex2D(sigma_float_tex,j,i);
|
||||
|
@ -173,14 +179,16 @@ static __inline__ __device__ double _sigma_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> epsilon_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> epsilon_double_tex;
|
||||
template <class numtyp> inline void epsilon_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(epsilon_float_tex); }
|
||||
template <> inline void epsilon_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(epsilon_double_tex); }
|
||||
template <class numtyp> inline void epsilon_unbind_texture()
|
||||
{ cudaUnbindTexture(epsilon_float_tex); }
|
||||
template <> inline void epsilon_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(epsilon_double_tex); }
|
||||
template <class numtyp> inline textureReference * epsilon_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"epsilon_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * epsilon_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"epsilon_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _epsilon_(const int i, const int j) {
|
||||
return tex2D(epsilon_float_tex,j,i);
|
||||
|
@ -197,14 +205,16 @@ static __inline__ __device__ double _epsilon_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> cutsq_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> cutsq_double_tex;
|
||||
template <class numtyp> inline void cutsq_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(cutsq_float_tex); }
|
||||
template <> inline void cutsq_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(cutsq_double_tex); }
|
||||
template <class numtyp> inline void cutsq_unbind_texture()
|
||||
{ cudaUnbindTexture(cutsq_float_tex); }
|
||||
template <> inline void cutsq_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(cutsq_double_tex); }
|
||||
template <class numtyp> inline textureReference * cutsq_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"cutsq_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * cutsq_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"cutsq_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _cutsq_(const int i, const int j) {
|
||||
return tex2D(cutsq_float_tex,j,i);
|
||||
|
@ -221,17 +231,19 @@ static __inline__ __device__ double _cutsq_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float2, 2, cudaReadModeElementType> lj1_float_tex;
|
||||
static texture<int4, 2, cudaReadModeElementType> lj1_double_tex;
|
||||
template <class numtyp> inline void lj1_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(lj1_float_tex); }
|
||||
template <> inline void lj1_bind_texture<double2>(NVC_ConstMatD2 &mat)
|
||||
{ mat.bind_texture(lj1_double_tex); }
|
||||
template <class numtyp> inline void lj1_unbind_texture()
|
||||
{ cudaUnbindTexture(lj1_float_tex); }
|
||||
template <> inline void lj1_unbind_texture<double2>()
|
||||
{ cudaUnbindTexture(lj1_double_tex); }
|
||||
template <class numtyp> inline textureReference * lj1_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lj1_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * lj1_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lj1_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__
|
||||
typename cu_vec_traits<numtyp>::vec2 _lj1_(const int i, const int j) {
|
||||
typename nvc_vec_traits<numtyp>::vec2 _lj1_(const int i, const int j) {
|
||||
return tex2D(lj1_float_tex,j,i);
|
||||
}
|
||||
#ifdef GB_GPU_DOUBLE
|
||||
|
@ -249,17 +261,19 @@ static __inline__ __device__ double2 _lj1_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float2, 2, cudaReadModeElementType> lj3_float_tex;
|
||||
static texture<int4, 2, cudaReadModeElementType> lj3_double_tex;
|
||||
template <class numtyp> inline void lj3_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(lj3_float_tex); }
|
||||
template <> inline void lj3_bind_texture<double2>(NVC_ConstMatD2 &mat)
|
||||
{ mat.bind_texture(lj3_double_tex); }
|
||||
template <class numtyp> inline void lj3_unbind_texture()
|
||||
{ cudaUnbindTexture(lj3_float_tex); }
|
||||
template <> inline void lj3_unbind_texture<double2>()
|
||||
{ cudaUnbindTexture(lj3_double_tex); }
|
||||
template <class numtyp> inline textureReference * lj3_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lj3_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * lj3_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"lj3_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__
|
||||
typename cu_vec_traits<numtyp>::vec2 _lj3_(const int i, const int j) {
|
||||
typename nvc_vec_traits<numtyp>::vec2 _lj3_(const int i, const int j) {
|
||||
return tex2D(lj3_float_tex,j,i);
|
||||
}
|
||||
#ifdef GB_GPU_DOUBLE
|
||||
|
@ -277,14 +291,16 @@ static __inline__ __device__ double2 _lj3_<double>(const int i,const int j) {
|
|||
|
||||
static texture<float, 2, cudaReadModeElementType> offset_float_tex;
|
||||
static texture<int2, 2, cudaReadModeElementType> offset_double_tex;
|
||||
template <class numtyp> inline void offset_bind_texture(NVC_ConstMatT &mat)
|
||||
{ mat.bind_texture(offset_float_tex); }
|
||||
template <> inline void offset_bind_texture<double>(NVC_ConstMatD &mat)
|
||||
{ mat.bind_texture(offset_double_tex); }
|
||||
template <class numtyp> inline void offset_unbind_texture()
|
||||
{ cudaUnbindTexture(offset_float_tex); }
|
||||
template <> inline void offset_unbind_texture<double>()
|
||||
{ cudaUnbindTexture(offset_double_tex); }
|
||||
template <class numtyp> inline textureReference * offset_get_texture() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"offset_float_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <> inline textureReference * offset_get_texture<double>() {
|
||||
const textureReference *ptr;
|
||||
cudaGetTextureReference(&ptr,"offset_double_tex");
|
||||
return const_cast<textureReference *>(ptr);
|
||||
}
|
||||
template <class numtyp>
|
||||
static __inline__ __device__ numtyp _offset_(const int i, const int j) {
|
||||
return tex2D(offset_float_tex,j,i);
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/***************************************************************************
|
||||
pair_tex_tar.cu
|
||||
-------------------
|
||||
W. Michael Brown
|
||||
|
||||
"Tar" of header and source files that need texture reference definitions
|
||||
within file scope.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS GPU Library
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : Tue Jun 23 2009
|
||||
copyright : (C) 2009 by W. Michael Brown
|
||||
email : wmbrown@sandia.gov
|
||||
***************************************************************************/
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_gpu_atom.cu"
|
||||
#include "lj_gpu.cu"
|
||||
#include "gb_gpu.cu"
|
||||
|
Loading…
Reference in New Issue