forked from lijiext/lammps
Merge 'gpu_hip_port' into master
This commit is contained in:
parent
5eef3b1828
commit
66c5fa2abd
|
@ -0,0 +1,148 @@
|
|||
# /* ----------------------------------------------------------------------
|
||||
# Generic Linux Makefile for HIP
|
||||
# - export HIP_PLATFORM=hcc (or nvcc) before execution
|
||||
# - change HIP_ARCH for your GPU
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
# this setting should match LAMMPS Makefile
|
||||
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
|
||||
|
||||
LMP_INC = -DLAMMPS_SMALLBIG
|
||||
|
||||
# precision for GPU calculations
|
||||
# -D_SINGLE_SINGLE # Single precision for all calculations
|
||||
# -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
HIP_PRECISION = -D_SINGLE_DOUBLE
|
||||
|
||||
HIP_OPTS = -O3
|
||||
HIP_HOST_OPTS = -Wno-deprecated-declarations
|
||||
HIP_HOST_INCLUDE =
|
||||
|
||||
# use device sort
|
||||
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
|
||||
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
|
||||
# path to cub
|
||||
HIP_HOST_INCLUDE += -I./
|
||||
# path to hipcub
|
||||
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
|
||||
|
||||
# use mpi
|
||||
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
|
||||
# this settings should match LAMMPS Makefile
|
||||
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
|
||||
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
|
||||
#MPI_COMP_OPTS += -I/usr/include/mpi -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1
|
||||
|
||||
HIP_PATH ?= $(wildcard /opt/rocm/hip)
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
ifeq (hcc,$(HIP_PLATFORM))
|
||||
HIP_OPTS += -ffast-math
|
||||
# possible values: gfx803,gfx900,gfx906
|
||||
HIP_ARCH = gfx906
|
||||
else ifeq (nvcc,$(HIP_PLATFORM))
|
||||
HIP_OPTS += --use_fast_math
|
||||
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
|
||||
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
|
||||
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
|
||||
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
|
||||
else
|
||||
$(error Specify HIP platform using 'export HIP_PLATFORM=(hcc,nvcc)')
|
||||
endif
|
||||
|
||||
BIN_DIR = .
|
||||
OBJ_DIR = ./obj
|
||||
LIB_DIR = .
|
||||
AR = ar
|
||||
BSH = /bin/sh
|
||||
|
||||
|
||||
# /* ----------------------------------------------------------------------
|
||||
# don't change section below without need
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
|
||||
HIP_GPU_OPTS += $(HIP_OPTS) -I./
|
||||
|
||||
ifeq (hcc,$(HIP_PLATFORM))
|
||||
HIP_HOST_OPTS += -fPIC
|
||||
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
|
||||
HIP_GPU_OPTS_S = -t="$(HIP_ARCH)" -f=\"
|
||||
HIP_GPU_OPTS_E = \"
|
||||
HIP_KERNEL_SUFFIX = .cpp
|
||||
HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH)
|
||||
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
|
||||
else ifeq (nvcc,$(HIP_PLATFORM))
|
||||
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --fatbin
|
||||
HIP_GPU_OPTS += $(HIP_ARCH)
|
||||
HIP_GPU_SORT_ARCH = $(HIP_ARCH)
|
||||
# fix nvcc can't handle -pthread flag
|
||||
MPI_COMP_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_COMP_OPTS))
|
||||
MPI_LINK_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_LINK_OPTS))
|
||||
endif
|
||||
|
||||
# hipcc is essential for device sort, because of hipcub is header only library and ROCm gpu code generation is deferred to the linking stage
|
||||
HIP_HOST_CC = $(HIP_PATH)/bin/hipcc
|
||||
HIP_HOST_OPTS += $(HIP_OPTS) $(MPI_COMP_OPTS) $(LMP_INC)
|
||||
HIP_HOST_CC_CMD = $(HIP_HOST_CC) $(HIP_HOST_OPTS) $(HIP_HOST_INCLUDE)
|
||||
|
||||
# sources
|
||||
|
||||
ALL_H = $(wildcard ./geryon/ucl*.h) $(wildcard ./geryon/hip*.h) $(wildcard ./lal_*.h)
|
||||
SRCS := $(wildcard ./lal_*.cpp)
|
||||
OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
|
||||
CUS := $(wildcard lal_*.cu)
|
||||
CUHS := $(filter-out pppm_cubin.h, $(CUS:lal_%.cu=%_cubin.h)) pppm_f_cubin.h pppm_d_cubin.h
|
||||
CUHS := $(addprefix $(OBJ_DIR)/, $(CUHS))
|
||||
|
||||
all: $(OBJ_DIR) $(CUHS) $(LIB_DIR)/libgpu.a $(BIN_DIR)/hip_get_devices
|
||||
|
||||
$(OBJ_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
# GPU kernels compilation
|
||||
|
||||
$(OBJ_DIR)/pppm_f_cubin.h: lal_pppm.cu $(ALL_H)
|
||||
@cp $< $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX)
|
||||
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=float -Dgrdtyp4=float4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX)
|
||||
@xxd -i $(OBJ_DIR)/pppm_f.cubin $@
|
||||
@sed -i "s/[a-zA-Z0-9_]*pppm_f_cubin/pppm_f/g" $@
|
||||
@rm $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_f.cubin
|
||||
|
||||
$(OBJ_DIR)/pppm_d_cubin.h: lal_pppm.cu $(ALL_H)
|
||||
@cp $< $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX)
|
||||
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=double -Dgrdtyp4=double4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX)
|
||||
@xxd -i $(OBJ_DIR)/pppm_d.cubin $@
|
||||
@sed -i "s/[a-zA-Z0-9_]*pppm_d_cubin/pppm_d/g" $@
|
||||
@rm $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_d.cubin
|
||||
|
||||
$(OBJ_DIR)/%_cubin.h: lal_%.cu $(ALL_H)
|
||||
@cp $< $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX)
|
||||
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX)
|
||||
@xxd -i $(OBJ_DIR)/$*.cubin $@
|
||||
@sed -i "s/[a-zA-Z0-9_]*$*_cubin/$*/g" $@
|
||||
@rm $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/$*.cubin
|
||||
|
||||
# host sources compilation
|
||||
|
||||
$(OBJ_DIR)/lal_atom.o: lal_atom.cpp $(CUHS) $(ALL_H)
|
||||
$(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) $(HIP_GPU_SORT_ARCH)
|
||||
|
||||
$(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(ALL_H)
|
||||
$(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR)
|
||||
|
||||
# libgpu building
|
||||
|
||||
$(LIB_DIR)/libgpu.a: $(OBJS)
|
||||
$(AR) -crs $@ $(OBJS)
|
||||
echo "export HIP_PLATFORM := $(HIP_PLATFORM)\n$(HIP_LIBS_TARGET)" > 'Makefile.lammps'
|
||||
|
||||
# test app building
|
||||
|
||||
$(BIN_DIR)/hip_get_devices: ./geryon/ucl_get_devices.cpp $(ALL_H)
|
||||
$(HIP_HOST_CC_CMD) -o $@ $< -DUCL_HIP $(MPI_LINK_OPTS)
|
||||
|
||||
clean:
|
||||
-rm -f $(BIN_DIR)/hip_get_devices $(LIB_DIR)/libgpu.a $(OBJS) $(OBJ_DIR)/temp_* $(CUHS)
|
|
@ -0,0 +1,519 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef HIP_DEVICE
|
||||
#define HIP_DEVICE
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "hip_macros.h"
|
||||
#include "ucl_types.h"
|
||||
|
||||
namespace ucl_hip {
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - COMMAND QUEUE STUFF
|
||||
// --------------------------------------------------------------------------
|
||||
typedef hipStream_t command_queue;
|
||||
|
||||
inline void ucl_sync(hipStream_t &stream) {
|
||||
CU_SAFE_CALL(hipStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
struct NVDProperties {
|
||||
int device_id;
|
||||
std::string name;
|
||||
int major;
|
||||
int minor;
|
||||
CUDA_INT_TYPE totalGlobalMem;
|
||||
int multiProcessorCount;
|
||||
|
||||
int maxThreadsPerBlock;
|
||||
int maxThreadsDim[3];
|
||||
int maxGridSize[3];
|
||||
int sharedMemPerBlock;
|
||||
int totalConstantMemory;
|
||||
int SIMDWidth;
|
||||
int memPitch;
|
||||
int regsPerBlock;
|
||||
int clockRate;
|
||||
int textureAlign;
|
||||
|
||||
int kernelExecTimeoutEnabled;
|
||||
int integrated;
|
||||
int canMapHostMemory;
|
||||
int concurrentKernels;
|
||||
int ECCEnabled;
|
||||
int computeMode;
|
||||
};
|
||||
|
||||
/// Class for looking at device properties
|
||||
/** \note Calls to change the device outside of the class results in incorrect
|
||||
* behavior
|
||||
* \note There is no error checking for indexing past the number of devices **/
|
||||
class UCL_Device {
|
||||
public:
|
||||
/// Collect properties for every GPU on the node
|
||||
/** \note You must set the active GPU with set() before using the device **/
|
||||
inline UCL_Device();
|
||||
|
||||
inline ~UCL_Device();
|
||||
|
||||
/// Returns 1 (For compatibility with OpenCL)
|
||||
inline int num_platforms() { return 1; }
|
||||
|
||||
/// Return a string with name and info of the current platform
|
||||
inline std::string platform_name()
|
||||
{ return "HIP platform"; }
|
||||
|
||||
/// Delete any contexts/data and set the platform number to be used
|
||||
inline int set_platform(const int pid);
|
||||
|
||||
/// Return the number of devices that support CUDA
|
||||
inline int num_devices() { return _properties.size(); }
|
||||
|
||||
/// Set the CUDA device to the specified device number
|
||||
/** A context and default command queue will be created for the device
|
||||
* Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not
|
||||
* be allocated for use. clear() is called to delete any contexts and
|
||||
* associated data from previous calls to set(). **/
|
||||
inline int set(int num);
|
||||
|
||||
/// Delete any context and associated data stored from a call to set()
|
||||
inline void clear();
|
||||
|
||||
/// Get the current device number
|
||||
inline int device_num() { return _device; }
|
||||
|
||||
/// Returns the default stream for the current device
|
||||
inline command_queue & cq() { return cq(0); }
|
||||
|
||||
/// Returns the stream indexed by i
|
||||
inline command_queue & cq(const int i) { return _cq[i]; }
|
||||
|
||||
/// Block until all commands in the default stream have completed
|
||||
inline void sync() { sync(0); }
|
||||
|
||||
/// Block until all commands in the specified stream have completed
|
||||
inline void sync(const int i) { ucl_sync(cq(i)); }
|
||||
|
||||
/// Get the number of command queues currently available on device
|
||||
inline int num_queues()
|
||||
{ return _cq.size(); }
|
||||
|
||||
/// Add a stream for device computations
|
||||
inline void push_command_queue() {
|
||||
_cq.push_back(hipStream_t());
|
||||
CU_SAFE_CALL(hipStreamCreateWithFlags(&_cq.back(),0));
|
||||
}
|
||||
|
||||
/// Remove a stream for device computations
|
||||
/** \note You cannot delete the default stream **/
|
||||
inline void pop_command_queue() {
|
||||
if (_cq.size()<2) return;
|
||||
CU_SAFE_CALL_NS(hipStreamDestroy(_cq.back()));
|
||||
_cq.pop_back();
|
||||
}
|
||||
|
||||
/// Set the default command queue (by default this is the null stream)
|
||||
/** \param i index of the command queue (as added by push_command_queue())
|
||||
If i is 0, the default command queue is set to the null stream **/
|
||||
inline void set_command_queue(const int i) {
|
||||
if (i==0) _cq[0]=0;
|
||||
else _cq[0]=_cq[i];
|
||||
}
|
||||
|
||||
/// Get the current CUDA device name
|
||||
inline std::string name() { return name(_device); }
|
||||
/// Get the CUDA device name
|
||||
inline std::string name(const int i)
|
||||
{ return std::string(_properties[i].name); }
|
||||
|
||||
/// Get a string telling the type of the current device
|
||||
inline std::string device_type_name() { return device_type_name(_device); }
|
||||
/// Get a string telling the type of the device
|
||||
inline std::string device_type_name(const int i) { return "GPU"; }
|
||||
|
||||
/// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
|
||||
inline int device_type() { return device_type(_device); }
|
||||
/// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
|
||||
inline int device_type(const int i) { return UCL_GPU; }
|
||||
|
||||
/// Returns true if host memory is efficiently addressable from device
|
||||
inline bool shared_memory() { return shared_memory(_device); }
|
||||
/// Returns true if host memory is efficiently addressable from device
|
||||
inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; }
|
||||
|
||||
/// Returns true if double precision is support for the current device
|
||||
inline bool double_precision() { return double_precision(_device); }
|
||||
/// Returns true if double precision is support for the device
|
||||
inline bool double_precision(const int i) {return arch(i)>=1.3;}
|
||||
|
||||
/// Get the number of compute units on the current device
|
||||
inline unsigned cus() { return cus(_device); }
|
||||
/// Get the number of compute units
|
||||
inline unsigned cus(const int i)
|
||||
{ return _properties[i].multiProcessorCount; }
|
||||
|
||||
/// Get the number of cores in the current device
|
||||
inline unsigned cores() { return cores(_device); }
|
||||
/// Get the number of cores
|
||||
inline unsigned cores(const int i)
|
||||
{ if (arch(i)<2.0) return _properties[i].multiProcessorCount*8;
|
||||
else if (arch(i)<2.1) return _properties[i].multiProcessorCount*32;
|
||||
else if (arch(i)<3.0) return _properties[i].multiProcessorCount*48;
|
||||
else return _properties[i].multiProcessorCount*192; }
|
||||
|
||||
/// Get the gigabytes of global memory in the current device
|
||||
inline double gigabytes() { return gigabytes(_device); }
|
||||
/// Get the gigabytes of global memory
|
||||
inline double gigabytes(const int i)
|
||||
{ return static_cast<double>(_properties[i].totalGlobalMem)/1073741824; }
|
||||
|
||||
/// Get the bytes of global memory in the current device
|
||||
inline size_t bytes() { return bytes(_device); }
|
||||
/// Get the bytes of global memory
|
||||
inline size_t bytes(const int i) { return _properties[i].totalGlobalMem; }
|
||||
|
||||
// Get the gigabytes of free memory in the current device
|
||||
inline double free_gigabytes() { return free_gigabytes(_device); }
|
||||
// Get the gigabytes of free memory
|
||||
inline double free_gigabytes(const int i)
|
||||
{ return static_cast<double>(free_bytes(i))/1073741824; }
|
||||
|
||||
// Get the bytes of free memory in the current device
|
||||
inline size_t free_bytes() { return free_bytes(_device); }
|
||||
// Get the bytes of free memory
|
||||
inline size_t free_bytes(const int i) {
|
||||
CUDA_INT_TYPE dfree, dtotal;
|
||||
CU_SAFE_CALL_NS(hipMemGetInfo(&dfree, &dtotal));
|
||||
return static_cast<size_t>(dfree);
|
||||
}
|
||||
|
||||
/// Return the GPGPU compute capability for current device
|
||||
inline double arch() { return arch(_device); }
|
||||
/// Return the GPGPU compute capability
|
||||
inline double arch(const int i)
|
||||
{ return static_cast<double>(_properties[i].minor)/10+_properties[i].major;}
|
||||
|
||||
/// Clock rate in GHz for current device
|
||||
inline double clock_rate() { return clock_rate(_device); }
|
||||
/// Clock rate in GHz
|
||||
inline double clock_rate(const int i)
|
||||
{ return _properties[i].clockRate*1e-6;}
|
||||
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size() { return group_size(_device); }
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size(const int i)
|
||||
{ return _properties[i].maxThreadsPerBlock; }
|
||||
|
||||
/// Return the maximum memory pitch in bytes for current device
|
||||
inline size_t max_pitch() { return max_pitch(_device); }
|
||||
/// Return the maximum memory pitch in bytes
|
||||
inline size_t max_pitch(const int i) { return _properties[i].memPitch; }
|
||||
|
||||
/// Returns false if accelerator cannot be shared by multiple processes
|
||||
/** If it cannot be determined, true is returned **/
|
||||
inline bool sharing_supported() { return sharing_supported(_device); }
|
||||
/// Returns false if accelerator cannot be shared by multiple processes
|
||||
/** If it cannot be determined, true is returned **/
|
||||
inline bool sharing_supported(const int i)
|
||||
{ return (_properties[i].computeMode == hipComputeModeDefault); }
|
||||
|
||||
/// True if splitting device into equal subdevices supported
|
||||
inline bool fission_equal()
|
||||
{ return fission_equal(_device); }
|
||||
/// True if splitting device into equal subdevices supported
|
||||
inline bool fission_equal(const int i)
|
||||
{ return false; }
|
||||
/// True if splitting device into subdevices by specified counts supported
|
||||
inline bool fission_by_counts()
|
||||
{ return fission_by_counts(_device); }
|
||||
/// True if splitting device into subdevices by specified counts supported
|
||||
inline bool fission_by_counts(const int i)
|
||||
{ return false; }
|
||||
/// True if splitting device into subdevices by affinity domains supported
|
||||
inline bool fission_by_affinity()
|
||||
{ return fission_by_affinity(_device); }
|
||||
/// True if splitting device into subdevices by affinity domains supported
|
||||
inline bool fission_by_affinity(const int i)
|
||||
{ return false; }
|
||||
|
||||
/// Maximum number of subdevices allowed from device fission
|
||||
inline int max_sub_devices()
|
||||
{ return max_sub_devices(_device); }
|
||||
/// Maximum number of subdevices allowed from device fission
|
||||
inline int max_sub_devices(const int i)
|
||||
{ return 0; }
|
||||
|
||||
/// List all devices along with all properties
|
||||
inline void print_all(std::ostream &out);
|
||||
|
||||
/// Select the platform that has accelerators (for compatibility with OpenCL)
|
||||
inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
|
||||
|
||||
inline int load_module(const void* program, hipModule_t& module, std::string *log=NULL){
|
||||
auto it = _loaded_modules.emplace(program, hipModule_t());
|
||||
if(!it.second){
|
||||
module = it.first->second;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
const unsigned int num_opts=2;
|
||||
hipJitOption options[num_opts];
|
||||
void *values[num_opts];
|
||||
|
||||
// set up size of compilation log buffer
|
||||
options[0] = hipJitOptionInfoLogBufferSizeBytes;
|
||||
values[0] = (void *)(int)10240;
|
||||
// set up pointer to the compilation log buffer
|
||||
options[1] = hipJitOptionInfoLogBuffer;
|
||||
char clog[10240] = { 0 };
|
||||
values[1] = clog;
|
||||
|
||||
hipError_t err=hipModuleLoadDataEx(&module,program,num_opts, options,(void **)values);
|
||||
|
||||
if (log!=NULL)
|
||||
*log=std::string(clog);
|
||||
|
||||
if (err != hipSuccess) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << std::endl
|
||||
<< "----------------------------------------------------------\n"
|
||||
<< " UCL Error: Error compiling PTX Program...\n"
|
||||
<< "----------------------------------------------------------\n";
|
||||
std::cerr << log << std::endl;
|
||||
#endif
|
||||
_loaded_modules.erase(it.first);
|
||||
return UCL_COMPILE_ERROR;
|
||||
}
|
||||
it.first->second = module;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
private:
|
||||
std::unordered_map<const void*, hipModule_t> _loaded_modules;
|
||||
int _device, _num_devices;
|
||||
std::vector<NVDProperties> _properties;
|
||||
std::vector<hipStream_t> _cq;
|
||||
hipDevice_t _cu_device;
|
||||
};
|
||||
|
||||
// Grabs the properties for all devices
|
||||
UCL_Device::UCL_Device() {
|
||||
CU_SAFE_CALL_NS(hipInit(0));
|
||||
CU_SAFE_CALL_NS(hipGetDeviceCount(&_num_devices));
|
||||
for (int i=0; i<_num_devices; ++i) {
|
||||
hipDevice_t dev;
|
||||
CU_SAFE_CALL_NS(hipDeviceGet(&dev,i));
|
||||
int major, minor;
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, dev));
|
||||
if (major==9999)
|
||||
continue;
|
||||
|
||||
NVDProperties prop;
|
||||
prop.device_id = i;
|
||||
prop.major=major;
|
||||
prop.minor=minor;
|
||||
|
||||
char namecstr[1024];
|
||||
CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev));
|
||||
prop.name=namecstr;
|
||||
|
||||
CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev));
|
||||
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
|
||||
|
||||
//#if CUDA_VERSION >= 2020
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev));
|
||||
//#endif
|
||||
//#if CUDA_VERSION >= 3010
|
||||
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev));
|
||||
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
//#endif
|
||||
|
||||
_properties.push_back(prop);
|
||||
}
|
||||
_device=-1;
|
||||
_cq.push_back(hipStream_t());
|
||||
_cq.back()=0;
|
||||
}
|
||||
|
||||
UCL_Device::~UCL_Device() {
|
||||
clear();
|
||||
}
|
||||
|
||||
int UCL_Device::set_platform(const int pid) {
|
||||
clear();
|
||||
#ifdef UCL_DEBUG
|
||||
assert(pid<num_platforms());
|
||||
#endif
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
// Set the CUDA device to the specified device number
|
||||
int UCL_Device::set(int num) {
|
||||
clear();
|
||||
_device=_properties[num].device_id;
|
||||
hipError_t err=hipDeviceGet(&_cu_device,_device);
|
||||
if (err!=hipSuccess) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << "UCL Error: Could not access accelerator number " << num
|
||||
<< " for use.\n";
|
||||
UCL_GERYON_EXIT;
|
||||
#endif
|
||||
return UCL_ERROR;
|
||||
}
|
||||
//hipError_t err=hipCtxCreate(&_context,0,_cu_device); deprecated and unnecessary
|
||||
err=hipSetDevice(_device);
|
||||
if (err!=hipSuccess) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << "UCL Error: Could not set accelerator number " << num
|
||||
<< " for use.\n";
|
||||
UCL_GERYON_EXIT;
|
||||
#endif
|
||||
return UCL_ERROR;
|
||||
}
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
void UCL_Device::clear() {
|
||||
if (_device>-1) {
|
||||
for (int i=1; i<num_queues(); i++) pop_command_queue();
|
||||
CU_SAFE_CALL_NS(hipDeviceReset());
|
||||
}
|
||||
_device=-1;
|
||||
}
|
||||
|
||||
// List all devices along with all properties
|
||||
void UCL_Device::print_all(std::ostream &out) {
|
||||
//#if CUDA_VERSION >= 2020
|
||||
int driver_version;
|
||||
hipDriverGetVersion(&driver_version);
|
||||
out << "Driver Version: "
|
||||
<< driver_version/1000 << "." << driver_version%100
|
||||
<< std::endl;
|
||||
//#endif
|
||||
|
||||
if (num_devices() == 0)
|
||||
out << "There is no device supporting HIP\n";
|
||||
for (int i=0; i<num_devices(); ++i) {
|
||||
out << "\nDevice " << i << ": \"" << name(i) << "\"\n";
|
||||
out << " Type of device: "
|
||||
<< device_type_name(i).c_str() << std::endl;
|
||||
out << " Compute capability: "
|
||||
<< arch(i) << std::endl;
|
||||
out << " Double precision support: ";
|
||||
if (double_precision(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Total amount of global memory: "
|
||||
<< gigabytes(i) << " GB\n";
|
||||
//#if CUDA_VERSION >= 2000
|
||||
out << " Number of compute units/multiprocessors: "
|
||||
<< _properties[i].multiProcessorCount << std::endl;
|
||||
out << " Number of cores: "
|
||||
<< cores(i) << std::endl;
|
||||
//#endif
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].totalConstantMemory << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
<< _properties[i].sharedMemPerBlock << " bytes\n";
|
||||
out << " Total number of registers available per block: "
|
||||
<< _properties[i].regsPerBlock << std::endl;
|
||||
out << " Warp size: "
|
||||
<< _properties[i].SIMDWidth << std::endl;
|
||||
out << " Maximum number of threads per block: "
|
||||
<< _properties[i].maxThreadsPerBlock << std::endl;
|
||||
out << " Maximum group size (# of threads per block) "
|
||||
<< _properties[i].maxThreadsDim[0] << " x "
|
||||
<< _properties[i].maxThreadsDim[1] << " x "
|
||||
<< _properties[i].maxThreadsDim[2] << std::endl;
|
||||
out << " Maximum item sizes (# threads for each dim) "
|
||||
<< _properties[i].maxGridSize[0] << " x "
|
||||
<< _properties[i].maxGridSize[1] << " x "
|
||||
<< _properties[i].maxGridSize[2] << std::endl;
|
||||
//out << " Maximum memory pitch: "
|
||||
// << max_pitch(i) << " bytes\n";
|
||||
//out << " Texture alignment: "
|
||||
// << _properties[i].textureAlign << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
//#if CUDA_VERSION >= 2020
|
||||
//out << " Run time limit on kernels: ";
|
||||
//if (_properties[i].kernelExecTimeoutEnabled)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
out << " Integrated: ";
|
||||
if (_properties[i].integrated)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
//out << " Support host page-locked memory mapping: ";
|
||||
//if (_properties[i].canMapHostMemory)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
out << " Compute mode: ";
|
||||
if (_properties[i].computeMode == hipComputeModeDefault)
|
||||
out << "Default\n"; // multiple threads can use device
|
||||
//#if CUDA_VERSION >= 8000
|
||||
// else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
|
||||
//#else
|
||||
else if (_properties[i].computeMode == hipComputeModeExclusive)
|
||||
//#endif
|
||||
out << "Exclusive\n"; // only thread can use device
|
||||
else if (_properties[i].computeMode == hipComputeModeProhibited)
|
||||
out << "Prohibited\n"; // no thread can use device
|
||||
//#if CUDART_VERSION >= 4000
|
||||
else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
|
||||
out << "Exclusive Process\n"; // multiple threads 1 process
|
||||
//#endif
|
||||
else
|
||||
out << "Unknown\n";
|
||||
//#endif
|
||||
//#if CUDA_VERSION >= 3010
|
||||
out << " Concurrent kernel execution: ";
|
||||
if (_properties[i].concurrentKernels)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
//out << " Device has ECC support enabled: ";
|
||||
//if (_properties[i].ECCEnabled)
|
||||
// out << "Yes\n";
|
||||
//else
|
||||
// out << "No\n";
|
||||
//#endif
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,298 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef HIP_KERNEL
|
||||
#define HIP_KERNEL
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_device.h"
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
namespace ucl_hip {
|
||||
|
||||
class UCL_Texture;
|
||||
template <class numtyp> class UCL_D_Vec;
|
||||
template <class numtyp> class UCL_D_Mat;
|
||||
template <class hosttype, class devtype> class UCL_Vector;
|
||||
template <class hosttype, class devtype> class UCL_Matrix;
|
||||
#define UCL_MAX_KERNEL_ARGS 256
|
||||
|
||||
/// Class storing 1 or more kernel functions from a single string or file
|
||||
class UCL_Program {
|
||||
UCL_Device* _device_ptr;
|
||||
public:
|
||||
inline UCL_Program(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); }
|
||||
inline UCL_Program(UCL_Device &device, const void *program,
|
||||
const char *flags="", std::string *log=NULL) {
|
||||
_device_ptr = &device; _cq=device.cq();
|
||||
init(device);
|
||||
load_string(program,flags,log);
|
||||
}
|
||||
|
||||
inline ~UCL_Program() {}
|
||||
|
||||
/// Initialize the program with a device
|
||||
inline void init(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); }
|
||||
|
||||
/// Clear any data associated with program
|
||||
/** \note Must call init() after each clear **/
|
||||
inline void clear() { }
|
||||
|
||||
/// Load a program from a file and compile with flags
|
||||
inline int load(const char *filename, const char *flags="", std::string *log=NULL) {
|
||||
std::ifstream in(filename);
|
||||
if (!in || in.is_open()==false) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << "UCL Error: Could not open kernel file: "
|
||||
<< filename << std::endl;
|
||||
UCL_GERYON_EXIT;
|
||||
#endif
|
||||
return UCL_FILE_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::string program((std::istreambuf_iterator<char>(in)),
|
||||
std::istreambuf_iterator<char>());
|
||||
in.close();
|
||||
return load_string(program.c_str(),flags,log);
|
||||
}
|
||||
|
||||
/// Load a program from a string and compile with flags
|
||||
inline int load_string(const void *program, const char *flags="", std::string *log=NULL) {
|
||||
return _device_ptr->load_module(program, _module, log);
|
||||
}
|
||||
|
||||
friend class UCL_Kernel;
|
||||
private:
|
||||
hipModule_t _module;
|
||||
hipStream_t _cq;
|
||||
friend class UCL_Texture;
|
||||
};
|
||||
|
||||
/// Class for dealing with CUDA Driver kernels
|
||||
class UCL_Kernel {
|
||||
public:
|
||||
UCL_Kernel() : _dimensions(1), _num_args(0) {
|
||||
_num_blocks[0]=0;
|
||||
}
|
||||
|
||||
UCL_Kernel(UCL_Program &program, const char *function) :
|
||||
_dimensions(1), _num_args(0) {
|
||||
_num_blocks[0]=0;
|
||||
set_function(program,function);
|
||||
_cq=program._cq;
|
||||
}
|
||||
|
||||
~UCL_Kernel() {}
|
||||
|
||||
/// Clear any function associated with the kernel
|
||||
inline void clear() { }
|
||||
|
||||
/// Get the kernel function from a program
|
||||
/** \ret UCL_ERROR_FLAG (UCL_SUCCESS, UCL_FILE_NOT_FOUND, UCL_ERROR) **/
|
||||
inline int set_function(UCL_Program &program, const char *function) {
|
||||
hipError_t err=hipModuleGetFunction(&_kernel,program._module,function);
|
||||
if (err!=hipSuccess) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << "UCL Error: Could not find function: " << function
|
||||
<< " in program.\n";
|
||||
UCL_GERYON_EXIT;
|
||||
#endif
|
||||
return UCL_FUNCTION_NOT_FOUND;
|
||||
}
|
||||
_cq=program._cq;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
/// Set the kernel argument.
|
||||
/** If not a device pointer, this must be repeated each time the argument
|
||||
* changes
|
||||
* \note To set kernel parameter i (i>0), parameter i-1 must be set **/
|
||||
template <class dtype>
|
||||
inline void set_arg(const unsigned index, const dtype * const arg) {
|
||||
if (index==_num_args)
|
||||
add_arg(arg);
|
||||
else if (index<_num_args){
|
||||
assert(0==1); // not implemented
|
||||
}
|
||||
else
|
||||
assert(0==1); // Must add kernel parameters in sequential order
|
||||
}
|
||||
|
||||
/// Set a geryon container as a kernel argument.
|
||||
template <class numtyp>
|
||||
inline void set_arg(const UCL_D_Vec<numtyp> * const arg)
|
||||
{ set_arg(&arg->begin()); }
|
||||
|
||||
/// Set a geryon container as a kernel argument.
|
||||
template <class numtyp>
|
||||
inline void set_arg(const UCL_D_Mat<numtyp> * const arg)
|
||||
{ set_arg(&arg->begin()); }
|
||||
|
||||
/// Set a geryon container as a kernel argument.
|
||||
template <class hosttype, class devtype>
|
||||
inline void set_arg(const UCL_Vector<hosttype, devtype> * const arg)
|
||||
{ set_arg(&arg->device.begin()); }
|
||||
|
||||
/// Set a geryon container as a kernel argument.
|
||||
template <class hosttype, class devtype>
|
||||
inline void set_arg(const UCL_Matrix<hosttype, devtype> * const arg)
|
||||
{ set_arg(&arg->device.begin()); }
|
||||
|
||||
/// Add a kernel argument.
|
||||
inline void add_arg(const hipDeviceptr_t* const arg) {
|
||||
add_arg<void*>((void**)arg);
|
||||
}
|
||||
|
||||
/// Add a kernel argument.
|
||||
template <class dtype>
|
||||
inline void add_arg(const dtype* const arg) {
|
||||
const auto old_size = _hip_kernel_args.size();
|
||||
const auto aligned_size = (old_size+alignof(dtype)-1) & ~(alignof(dtype)-1);
|
||||
const auto arg_size = sizeof(dtype);
|
||||
_hip_kernel_args.resize(aligned_size + arg_size);
|
||||
*((dtype*)(&_hip_kernel_args[aligned_size])) = *arg;
|
||||
_num_args++;
|
||||
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
|
||||
}
|
||||
|
||||
/// Add a geryon container as a kernel argument.
|
||||
template <class numtyp>
|
||||
inline void add_arg(const UCL_D_Vec<numtyp> * const arg)
|
||||
{ add_arg(&arg->begin()); }
|
||||
|
||||
/// Add a geryon container as a kernel argument.
|
||||
template <class numtyp>
|
||||
inline void add_arg(const UCL_D_Mat<numtyp> * const arg)
|
||||
{ add_arg(&arg->begin()); }
|
||||
|
||||
/// Add a geryon container as a kernel argument.
|
||||
template <class hosttype, class devtype>
|
||||
inline void add_arg(const UCL_Vector<hosttype, devtype> * const arg)
|
||||
{ add_arg(&arg->device.begin()); }
|
||||
|
||||
/// Add a geryon container as a kernel argument.
|
||||
template <class hosttype, class devtype>
|
||||
inline void add_arg(const UCL_Matrix<hosttype, devtype> * const arg)
|
||||
{ add_arg(&arg->device.begin()); }
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue is used for the kernel execution **/
|
||||
inline void set_size(const size_t num_blocks, const size_t block_size) {
|
||||
_dimensions=1;
|
||||
_num_blocks[0]=num_blocks;
|
||||
_num_blocks[1]=1;
|
||||
_num_blocks[2]=1;
|
||||
|
||||
_block_size[0]=block_size;
|
||||
_block_size[1]=1;
|
||||
_block_size[2]=1;
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue for the kernel is changed to cq **/
|
||||
inline void set_size(const size_t num_blocks, const size_t block_size,
|
||||
command_queue &cq)
|
||||
{ _cq=cq; set_size(num_blocks,block_size); }
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue is used for the kernel execution **/
|
||||
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
|
||||
const size_t block_size_x, const size_t block_size_y) {
|
||||
_dimensions=2;
|
||||
_num_blocks[0]=num_blocks_x;
|
||||
_num_blocks[1]=num_blocks_y;
|
||||
_num_blocks[2]=1;
|
||||
|
||||
_block_size[0]=block_size_x;
|
||||
_block_size[1]=block_size_y;
|
||||
_block_size[2]=1;
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue for the kernel is changed to cq **/
|
||||
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
|
||||
const size_t block_size_x, const size_t block_size_y,
|
||||
command_queue &cq)
|
||||
{_cq=cq; set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y);}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue is used for the kernel execution **/
|
||||
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
|
||||
const size_t block_size_x,
|
||||
const size_t block_size_y, const size_t block_size_z) {
|
||||
_dimensions=2;
|
||||
_num_blocks[0]=num_blocks_x;
|
||||
_num_blocks[1]=num_blocks_y;
|
||||
_num_blocks[2]=1;
|
||||
|
||||
_block_size[0]=block_size_x;
|
||||
_block_size[1]=block_size_y;
|
||||
_block_size[2]=block_size_z;
|
||||
}
|
||||
|
||||
/// Set the number of thread blocks and the number of threads in each block
|
||||
/** \note This should be called before any arguments have been added
|
||||
\note The default command queue is used for the kernel execution **/
|
||||
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
|
||||
const size_t block_size_x, const size_t block_size_y,
|
||||
const size_t block_size_z, command_queue &cq) {
|
||||
_cq=cq;
|
||||
set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y,
|
||||
block_size_z);
|
||||
}
|
||||
|
||||
/// Run the kernel in the default command queue
|
||||
inline void run() {
|
||||
size_t args_size = _hip_kernel_args.size();
|
||||
void *config[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, (void*)_hip_kernel_args.data(),
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
const auto res = hipModuleLaunchKernel(_kernel,_num_blocks[0],_num_blocks[1],
|
||||
_num_blocks[2],_block_size[0],_block_size[1],
|
||||
_block_size[2],0,_cq, NULL, config);
|
||||
CU_SAFE_CALL(res);
|
||||
//#endif
|
||||
}
|
||||
|
||||
/// Clear any arguments associated with the kernel
|
||||
inline void clear_args() {
|
||||
_num_args=0;
|
||||
_hip_kernel_args.clear();
|
||||
}
|
||||
|
||||
/// Return the default command queue/stream associated with this data
|
||||
inline command_queue & cq() { return _cq; }
|
||||
/// Change the default command queue associated with matrix
|
||||
inline void cq(command_queue &cq_in) { _cq=cq_in; }
|
||||
#include "ucl_arg_kludge.h"
|
||||
|
||||
private:
|
||||
hipFunction_t _kernel;
|
||||
hipStream_t _cq;
|
||||
unsigned _dimensions;
|
||||
unsigned _num_blocks[3];
|
||||
unsigned _num_args;
|
||||
friend class UCL_Texture;
|
||||
|
||||
unsigned _block_size[3];
|
||||
std::vector<char> _hip_kernel_args;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
#ifndef HIP_MACROS_H
|
||||
#define HIP_MACROS_H
|
||||
|
||||
#include <cstdio>
|
||||
#include <cassert>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
//#if CUDA_VERSION >= 3020
|
||||
#define CUDA_INT_TYPE size_t
|
||||
//#else
|
||||
//#define CUDA_INT_TYPE unsigned
|
||||
//#endif
|
||||
|
||||
#ifdef MPI_GERYON
|
||||
#include "mpi.h"
|
||||
#define NVD_GERYON_EXIT do { \
|
||||
int is_final; \
|
||||
MPI_Finalized(&is_final); \
|
||||
if (!is_final) \
|
||||
MPI_Abort(MPI_COMM_WORLD,-1); \
|
||||
} while(0)
|
||||
#else
|
||||
#define NVD_GERYON_EXIT assert(0==1)
|
||||
#endif
|
||||
|
||||
#ifndef UCL_GERYON_EXIT
|
||||
#define UCL_GERYON_EXIT NVD_GERYON_EXIT
|
||||
#endif
|
||||
|
||||
#ifdef UCL_DEBUG
|
||||
#define UCL_SYNC_DEBUG
|
||||
#define UCL_DESTRUCT_CHECK
|
||||
#endif
|
||||
|
||||
#ifndef UCL_NO_API_CHECK
|
||||
|
||||
#define CU_SAFE_CALL_NS( call ) do { \
|
||||
hipError_t err = call; \
|
||||
if( hipSuccess != err) { \
|
||||
fprintf(stderr, "HIP runtime error %d in call at file '%s' in line %i.\n", \
|
||||
err, __FILE__, __LINE__ ); \
|
||||
NVD_GERYON_EXIT; \
|
||||
} } while (0)
|
||||
|
||||
#ifdef UCL_SYNC_DEBUG
|
||||
|
||||
#define CU_SAFE_CALL( call ) do { \
|
||||
CU_SAFE_CALL_NS( call ); \
|
||||
hipError_t err=hipCtxSynchronize(); \
|
||||
if( hipSuccess != err) { \
|
||||
fprintf(stderr, "HIP runtime error %d in file '%s' in line %i.\n", \
|
||||
err, __FILE__, __LINE__ ); \
|
||||
NVD_GERYON_EXIT; \
|
||||
} } while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define CU_SAFE_CALL( call ) CU_SAFE_CALL_NS( call )
|
||||
|
||||
#endif
|
||||
|
||||
#else // not DEBUG
|
||||
|
||||
// void macros for performance reasons
|
||||
#define CU_SAFE_CALL_NS( call ) call
|
||||
#define CU_SAFE_CALL( call) call
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef UCL_DESTRUCT_CHECK
|
||||
|
||||
#define CU_DESTRUCT_CALL( call) CU_SAFE_CALL( call)
|
||||
#define CU_DESTRUCT_CALL_NS( call) CU_SAFE_CALL_NS( call)
|
||||
|
||||
#else
|
||||
|
||||
#define CU_DESTRUCT_CALL( call) call
|
||||
#define CU_DESTRUCT_CALL_NS( call) call
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
/*! \file */
|
||||
|
||||
#ifndef HIP_MAT_H
|
||||
#define HIP_MAT_H
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_memory.h"
|
||||
|
||||
/// Namespace for CUDA Driver routines
|
||||
namespace ucl_hip {
|
||||
|
||||
#define _UCL_MAT_ALLOW
|
||||
#define _UCL_DEVICE_PTR_MAT
|
||||
#include "ucl_basemat.h"
|
||||
#include "ucl_h_vec.h"
|
||||
#include "ucl_h_mat.h"
|
||||
#include "ucl_d_vec.h"
|
||||
#include "ucl_d_mat.h"
|
||||
#include "ucl_s_obj_help.h"
|
||||
#include "ucl_vector.h"
|
||||
#include "ucl_matrix.h"
|
||||
#undef _UCL_DEVICE_PTR_MAT
|
||||
#undef _UCL_MAT_ALLOW
|
||||
|
||||
#define UCL_COPY_ALLOW
|
||||
#include "ucl_copy.h"
|
||||
#undef UCL_COPY_ALLOW
|
||||
|
||||
#define UCL_PRINT_ALLOW
|
||||
#include "ucl_print.h"
|
||||
#undef UCL_PRINT_ALLOW
|
||||
|
||||
} // namespace ucl_cudadr
|
||||
|
||||
#endif
|
|
@ -0,0 +1,279 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef HIP_MEMORY_H
|
||||
#define HIP_MEMORY_H
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include "hip_macros.h"
|
||||
#include "hip_device.h"
|
||||
#include "ucl_types.h"
|
||||
|
||||
namespace ucl_hip {
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - API Specific Types
|
||||
// --------------------------------------------------------------------------
|
||||
//typedef dim3 ucl_kernel_dim;
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
typedef enum hipArray_Format {
|
||||
HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
|
||||
HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
|
||||
HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
|
||||
HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
|
||||
HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
|
||||
HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
|
||||
HIP_AD_FORMAT_HALF = 0x10,
|
||||
HIP_AD_FORMAT_FLOAT = 0x20
|
||||
}hipArray_Format;
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - API SPECIFIC DEVICE POINTERS
|
||||
// --------------------------------------------------------------------------
|
||||
typedef hipDeviceptr_t device_ptr;
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - HOST MEMORY ALLOCATION ROUTINES
|
||||
// --------------------------------------------------------------------------
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
|
||||
hipError_t err=hipSuccess;
|
||||
if (kind==UCL_NOT_PINNED)
|
||||
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
|
||||
else if (kind==UCL_WRITE_ONLY)
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
|
||||
else
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
|
||||
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=cm.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
|
||||
hipError_t err=hipSuccess;
|
||||
if (kind==UCL_NOT_PINNED)
|
||||
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
|
||||
else if (kind==UCL_WRITE_ONLY)
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
|
||||
else
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
|
||||
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=dev.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline void _host_free(mat_type &mat) {
|
||||
if (mat.kind()==UCL_VIEW)
|
||||
return;
|
||||
else if (mat.kind()!=UCL_NOT_PINNED)
|
||||
CU_DESTRUCT_CALL(hipHostFree(mat.begin()));
|
||||
else
|
||||
free(mat.begin());
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _host_resize(mat_type &mat, const size_t n) {
|
||||
_host_free(mat);
|
||||
hipError_t err=hipSuccess;
|
||||
if (mat.kind()==UCL_NOT_PINNED)
|
||||
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
|
||||
else if (mat.kind()==UCL_WRITE_ONLY)
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
|
||||
else
|
||||
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
|
||||
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
|
||||
return UCL_MEMORY_ERROR;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - DEVICE MEMORY ALLOCATION ROUTINES
|
||||
// --------------------------------------------------------------------------
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
const enum UCL_MEMOPT kind) {
|
||||
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=cm.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
const enum UCL_MEMOPT kind) {
|
||||
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=dev.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t rows,
|
||||
const size_t cols, size_t &pitch,
|
||||
const enum UCL_MEMOPT kind) {
|
||||
hipError_t err;
|
||||
size_t upitch;
|
||||
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
|
||||
cols*sizeof(typename mat_type::data_type),rows);
|
||||
pitch=static_cast<size_t>(upitch);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=cm.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _device_alloc(mat_type &mat, UCL_Device &d, const size_t rows,
|
||||
const size_t cols, size_t &pitch,
|
||||
const enum UCL_MEMOPT kind) {
|
||||
hipError_t err;
|
||||
size_t upitch;
|
||||
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
|
||||
cols*sizeof(typename mat_type::data_type),rows);
|
||||
pitch=static_cast<size_t>(upitch);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
mat.cq()=d.cq();
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline void _device_free(mat_type &mat) {
|
||||
if (mat.kind()!=UCL_VIEW){
|
||||
CU_DESTRUCT_CALL(hipFree((void*)mat.cbegin()));
|
||||
}
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _device_resize(mat_type &mat, const size_t n) {
|
||||
_device_free(mat);
|
||||
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline int _device_resize(mat_type &mat, const size_t rows,
|
||||
const size_t cols, size_t &pitch) {
|
||||
_device_free(mat);
|
||||
hipError_t err;
|
||||
size_t upitch;
|
||||
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
|
||||
cols*sizeof(typename mat_type::data_type),rows);
|
||||
pitch=static_cast<size_t>(upitch);
|
||||
if (err!=hipSuccess)
|
||||
return UCL_MEMORY_ERROR;
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in) {
|
||||
*ptr=in;
|
||||
}
|
||||
|
||||
template <class numtyp>
|
||||
inline void _device_view(hipDeviceptr_t *ptr, numtyp *in) {
|
||||
*ptr=0;
|
||||
}
|
||||
|
||||
inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in,
|
||||
const size_t offset, const size_t numsize) {
|
||||
*ptr=(hipDeviceptr_t)(((char*)in)+offset*numsize);
|
||||
}
|
||||
|
||||
template <class numtyp>
|
||||
inline void _device_view(hipDeviceptr_t *ptr, numtyp *in,
|
||||
const size_t offset, const size_t numsize) {
|
||||
*ptr=0;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - DEVICE IMAGE ALLOCATION ROUTINES
|
||||
// --------------------------------------------------------------------------
|
||||
template <class mat_type, class copy_type>
|
||||
inline void _device_image_alloc(mat_type &mat, copy_type &cm, const size_t rows,
|
||||
const size_t cols) {
|
||||
assert(0==1);
|
||||
}
|
||||
|
||||
template <class mat_type, class copy_type>
|
||||
inline void _device_image_alloc(mat_type &mat, UCL_Device &d, const size_t rows,
|
||||
const size_t cols) {
|
||||
assert(0==1);
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline void _device_image_free(mat_type &mat) {
|
||||
assert(0==1);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - ZERO ROUTINES
|
||||
// --------------------------------------------------------------------------
|
||||
inline void _host_zero(void *ptr, const size_t n) {
|
||||
memset(ptr,0,n);
|
||||
}
|
||||
|
||||
template <class mat_type>
|
||||
inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) {
|
||||
CU_SAFE_CALL(hipMemsetAsync((void*)mat.cbegin(),0,n,cq));
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// - MEMCPY ROUTINES
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
|
||||
template<class mat1, class mat2>
|
||||
hipMemcpyKind _memcpy_kind(mat1 &dst, const mat2 &src){
|
||||
assert(mat1::MEM_TYPE < 2 && mat2::MEM_TYPE < 2);
|
||||
return (hipMemcpyKind)((1 - mat2::MEM_TYPE)*2 + (1 - mat1::MEM_TYPE));
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n) {
|
||||
CU_SAFE_CALL(hipMemcpy((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src)));
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n, hipStream_t &cq) {
|
||||
CU_SAFE_CALL(hipMemcpyAsync((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src), cq));
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows) {
|
||||
CU_SAFE_CALL(hipMemcpy2D((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src)));
|
||||
}
|
||||
|
||||
template<class mat1, class mat2>
|
||||
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
|
||||
const size_t spitch, const size_t cols,
|
||||
const size_t rows,hipStream_t &cq) {
|
||||
CU_SAFE_CALL(hipMemcpy2DAsync((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src), cq));
|
||||
}
|
||||
|
||||
} // namespace ucl_cudart
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef HIP_TEXTURE
|
||||
#define HIP_TEXTURE
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_kernel.h"
|
||||
#include "hip_mat.h"
|
||||
|
||||
namespace ucl_hip {
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
inline hipError_t hipModuleGetTexRef(CUtexref* texRef, hipModule_t hmod, const char* name){
|
||||
return hipCUResultTohipError(cuModuleGetTexRef(texRef, hmod, name));
|
||||
}
|
||||
inline hipError_t hipTexRefSetFormat(CUtexref tex, hipArray_Format fmt, int NumPackedComponents) {
|
||||
return hipCUResultTohipError(cuTexRefSetFormat(tex, (CUarray_format)fmt, NumPackedComponents ));
|
||||
}
|
||||
inline hipError_t hipTexRefSetAddress(size_t* offset, CUtexref tex, hipDeviceptr_t devPtr, size_t size) {
|
||||
return hipCUResultTohipError(cuTexRefSetAddress(offset, tex, devPtr, size));
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Class storing a texture reference
|
||||
class UCL_Texture {
|
||||
public:
|
||||
UCL_Texture() {}
|
||||
~UCL_Texture() {}
|
||||
/// Construct with a specified texture reference
|
||||
inline UCL_Texture(UCL_Program &prog, const char *texture_name)
|
||||
{ get_texture(prog,texture_name); }
|
||||
/// Set the texture reference for this object
|
||||
inline void get_texture(UCL_Program &prog, const char *texture_name)
|
||||
{
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
CU_SAFE_CALL(hipModuleGetTexRef(&_tex, prog._module, texture_name));
|
||||
#else
|
||||
size_t _global_var_size;
|
||||
CU_SAFE_CALL(hipModuleGetGlobal(&_device_ptr_to_global_var, &_global_var_size, prog._module, texture_name));
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Bind a float array where each fetch grabs a vector of length numel
|
||||
template<class numtyp>
|
||||
inline void bind_float(UCL_D_Vec<numtyp> &vec, const unsigned numel)
|
||||
{ _bind_float(vec,numel); }
|
||||
|
||||
/// Bind a float array where each fetch grabs a vector of length numel
|
||||
template<class numtyp>
|
||||
inline void bind_float(UCL_D_Mat<numtyp> &vec, const unsigned numel)
|
||||
{ _bind_float(vec,numel); }
|
||||
|
||||
/// Bind a float array where each fetch grabs a vector of length numel
|
||||
template<class numtyp, class devtyp>
|
||||
inline void bind_float(UCL_Vector<numtyp, devtyp> &vec, const unsigned numel)
|
||||
{ _bind_float(vec.device,numel); }
|
||||
|
||||
/// Bind a float array where each fetch grabs a vector of length numel
|
||||
template<class numtyp, class devtyp>
|
||||
inline void bind_float(UCL_Matrix<numtyp, devtyp> &vec, const unsigned numel)
|
||||
{ _bind_float(vec.device,numel); }
|
||||
|
||||
/// Unbind the texture reference from the memory allocation
|
||||
inline void unbind() { }
|
||||
|
||||
/// Make a texture reference available to kernel
|
||||
inline void allow(UCL_Kernel &kernel) {
|
||||
//#if CUDA_VERSION < 4000
|
||||
//CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
|
||||
//#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
CUtexref _tex;
|
||||
#else
|
||||
void* _device_ptr_to_global_var;
|
||||
#endif
|
||||
friend class UCL_Kernel;
|
||||
|
||||
template<class mat_typ>
|
||||
inline void _bind_float(mat_typ &vec, const unsigned numel) {
|
||||
#ifdef UCL_DEBUG
|
||||
assert(numel!=0 && numel<5);
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
if (vec.element_size()==sizeof(float))
|
||||
CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_FLOAT, numel));
|
||||
else {
|
||||
if (numel>2)
|
||||
CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_SIGNED_INT32, numel));
|
||||
else
|
||||
CU_SAFE_CALL(hipTexRefSetFormat(_tex,HIP_AD_FORMAT_SIGNED_INT32,numel*2));
|
||||
}
|
||||
CU_SAFE_CALL(hipTexRefSetAddress(NULL, _tex, vec.cbegin(), vec.numel()*vec.element_size()));
|
||||
#else
|
||||
void* data_ptr = (void*)vec.cbegin();
|
||||
CU_SAFE_CALL(hipMemcpyHtoD(hipDeviceptr_t(_device_ptr_to_global_var), &data_ptr, sizeof(void*)));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
/* -----------------------------------------------------------------------
|
||||
Copyright (2010) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef HIP_TIMER_H
|
||||
#define HIP_TIMER_H
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_macros.h"
|
||||
#include "hip_device.h"
|
||||
|
||||
namespace ucl_hip {
|
||||
|
||||
/// Class for timing CUDA Driver events
|
||||
class UCL_Timer {
|
||||
public:
|
||||
inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
|
||||
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
|
||||
{ init(dev); }
|
||||
|
||||
inline ~UCL_Timer() { clear(); }
|
||||
|
||||
/// Clear any data associated with timer
|
||||
/** \note init() must be called to reuse timer after a clear() **/
|
||||
inline void clear() {
|
||||
if (_initialized) {
|
||||
CU_DESTRUCT_CALL(hipEventDestroy(start_event));
|
||||
CU_DESTRUCT_CALL(hipEventDestroy(stop_event));
|
||||
_initialized=false;
|
||||
_total_time=0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize default command queue for timing
|
||||
inline void init(UCL_Device &dev) { init(dev, dev.cq()); }
|
||||
|
||||
/// Initialize command queue for timing
|
||||
inline void init(UCL_Device &dev, command_queue &cq) {
|
||||
clear();
|
||||
_cq=cq;
|
||||
_initialized=true;
|
||||
CU_SAFE_CALL( hipEventCreateWithFlags(&start_event,0) );
|
||||
CU_SAFE_CALL( hipEventCreateWithFlags(&stop_event,0) );
|
||||
}
|
||||
|
||||
/// Start timing on command queue
|
||||
inline void start() { CU_SAFE_CALL(hipEventRecord(start_event,_cq)); }
|
||||
|
||||
/// Stop timing on command queue
|
||||
inline void stop() { CU_SAFE_CALL(hipEventRecord(stop_event,_cq)); }
|
||||
|
||||
/// Block until the start event has been reached on device
|
||||
inline void sync_start()
|
||||
{ CU_SAFE_CALL(hipEventSynchronize(start_event)); }
|
||||
|
||||
/// Block until the stop event has been reached on device
|
||||
inline void sync_stop()
|
||||
{ CU_SAFE_CALL(hipEventSynchronize(stop_event)); }
|
||||
|
||||
/// Set the time elapsed to zero (not the total_time)
|
||||
inline void zero() {
|
||||
CU_SAFE_CALL(hipEventRecord(start_event,_cq));
|
||||
CU_SAFE_CALL(hipEventRecord(stop_event,_cq));
|
||||
}
|
||||
|
||||
/// Set the total time to zero
|
||||
inline void zero_total() { _total_time=0.0; }
|
||||
|
||||
/// Add time from previous start and stop to total
|
||||
/** Forces synchronization **/
|
||||
inline double add_to_total()
|
||||
{ double t=time(); _total_time+=t; return t/1000.0; }
|
||||
|
||||
/// Add a user specified time to the total (ms)
|
||||
inline void add_time_to_total(const double t) { _total_time+=t; }
|
||||
|
||||
/// Return the time (ms) of last start to stop - Forces synchronization
|
||||
inline double time() {
|
||||
float timer;
|
||||
CU_SAFE_CALL(hipEventSynchronize(stop_event));
|
||||
CU_SAFE_CALL( hipEventElapsedTime(&timer,start_event,stop_event) );
|
||||
return timer;
|
||||
}
|
||||
|
||||
/// Return the time (s) of last start to stop - Forces synchronization
|
||||
inline double seconds() { return time()/1000.0; }
|
||||
|
||||
/// Return the total time in ms
|
||||
inline double total_time() { return _total_time; }
|
||||
|
||||
/// Return the total time in seconds
|
||||
inline double total_seconds() { return _total_time/1000.0; }
|
||||
|
||||
private:
|
||||
hipEvent_t start_event, stop_event;
|
||||
hipStream_t _cq;
|
||||
double _total_time;
|
||||
bool _initialized;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
|
@ -36,6 +36,11 @@ using namespace ucl_cudadr;
|
|||
using namespace ucl_cudart;
|
||||
#endif
|
||||
|
||||
#ifdef UCL_HIP
|
||||
#include "hip_device.h"
|
||||
using namespace ucl_hip;
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
UCL_Device cop;
|
||||
std::cout << "Found " << cop.num_platforms() << " platform(s).\n";
|
||||
|
|
|
@ -179,13 +179,15 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
if (_eflag) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
evdwl+=engv[i];
|
||||
if (_ef_atom)
|
||||
if (_ilist==NULL)
|
||||
if (_ef_atom) {
|
||||
if (_ilist==NULL) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
eatom[i]+=engv[i];
|
||||
else
|
||||
} else {
|
||||
for (int i=0; i<_inum; i++)
|
||||
eatom[_ilist[i]]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart=_inum;
|
||||
}
|
||||
if (_vflag) {
|
||||
|
@ -193,7 +195,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int j=0; j<6; j++) {
|
||||
for (int i=vstart; i<iend; i++)
|
||||
virial[j]+=engv[i];
|
||||
if (_vf_atom)
|
||||
if (_vf_atom){
|
||||
if (_ilist==NULL) {
|
||||
int ii=0;
|
||||
for (int i=vstart; i<iend; i++)
|
||||
|
@ -203,6 +205,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int i=vstart; i<iend; i++)
|
||||
vatom[_ilist[ii++]][j]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart+=_inum;
|
||||
iend+=_inum;
|
||||
}
|
||||
|
@ -228,7 +231,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
evdwl+=engv[i];
|
||||
for (int i=_inum; i<iend; i++)
|
||||
ecoul+=engv[i];
|
||||
if (_ef_atom)
|
||||
if (_ef_atom) {
|
||||
if (_ilist==NULL) {
|
||||
for (int i=0; i<_inum; i++)
|
||||
eatom[i]+=engv[i];
|
||||
|
@ -240,6 +243,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int i=_inum, ii=0; i<iend; i++)
|
||||
eatom[_ilist[ii++]]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart=iend;
|
||||
iend+=_inum;
|
||||
}
|
||||
|
@ -247,7 +251,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int j=0; j<6; j++) {
|
||||
for (int i=vstart; i<iend; i++)
|
||||
virial[j]+=engv[i];
|
||||
if (_vf_atom)
|
||||
if (_vf_atom) {
|
||||
if (_ilist==NULL) {
|
||||
for (int i=vstart, ii=0; i<iend; i++)
|
||||
vatom[ii++][j]+=engv[i];
|
||||
|
@ -255,6 +259,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
|||
for (int i=vstart, ii=0; i<iend; i++)
|
||||
vatom[_ilist[ii++]][j]+=engv[i];
|
||||
}
|
||||
}
|
||||
vstart+=_inum;
|
||||
iend+=_inum;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,10 @@ using namespace ucl_opencl;
|
|||
#include "geryon/nvc_timer.h"
|
||||
#include "geryon/nvc_mat.h"
|
||||
using namespace ucl_cudart;
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_timer.h"
|
||||
#include "geryon/hip_mat.h"
|
||||
using namespace ucl_hip;
|
||||
#else
|
||||
#include "geryon/nvd_timer.h"
|
||||
#include "geryon/nvd_mat.h"
|
||||
|
|
|
@ -15,6 +15,11 @@
|
|||
|
||||
#include "lal_atom.h"
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hipcub/hipcub.hpp>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
#define AtomT Atom<numtyp,acctyp>
|
||||
|
||||
|
@ -70,6 +75,26 @@ bool AtomT::alloc(const int nall) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
size_t temp_storage_bytes = 0;
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
|
||||
return false;
|
||||
if(sort_out_size < _max_atoms){
|
||||
if (sort_out_keys ) hipFree(sort_out_keys);
|
||||
if (sort_out_values) hipFree(sort_out_values);
|
||||
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
|
||||
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
|
||||
sort_out_size = _max_atoms;
|
||||
}
|
||||
if(temp_storage_bytes > sort_temp_storage_size){
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
hipMalloc(&sort_temp_storage, temp_storage_bytes);
|
||||
sort_temp_storage_size = temp_storage_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// --------------------------- Device allocations
|
||||
int gpu_bytes=0;
|
||||
success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||
|
@ -184,6 +209,27 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
|||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
size_t temp_storage_bytes = 0;
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
|
||||
return false;
|
||||
if(sort_out_size < _max_atoms){
|
||||
if (sort_out_keys ) hipFree(sort_out_keys);
|
||||
if (sort_out_values) hipFree(sort_out_values);
|
||||
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
|
||||
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
|
||||
sort_out_size = _max_atoms;
|
||||
}
|
||||
if(temp_storage_bytes > sort_temp_storage_size){
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
hipMalloc(&sort_temp_storage, temp_storage_bytes);
|
||||
sort_temp_storage_size = temp_storage_bytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
success=success && (dev_particle_id.alloc(_max_atoms,*dev,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=dev_particle_id.row_bytes();
|
||||
|
@ -275,6 +321,19 @@ void AtomT::clear_resize() {
|
|||
if (_gpu_nbor==1) cudppDestroyPlan(sort_plan);
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if (_gpu_nbor==1) {
|
||||
if(sort_out_keys) hipFree(sort_out_keys);
|
||||
if(sort_out_values) hipFree(sort_out_values);
|
||||
if(sort_temp_storage) hipFree(sort_temp_storage);
|
||||
sort_out_keys = nullptr;
|
||||
sort_out_values = nullptr;
|
||||
sort_temp_storage = nullptr;
|
||||
sort_temp_storage_size = 0;
|
||||
sort_out_size = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_gpu_nbor==2) {
|
||||
host_particle_id.clear();
|
||||
host_cell_id.clear();
|
||||
|
@ -326,6 +385,22 @@ void AtomT::sort_neighbor(const int num_atoms) {
|
|||
UCL_GERYON_EXIT;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
if(sort_out_size < num_atoms){
|
||||
printf("AtomT::sort_neighbor: invalid temp buffer size\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(sort_temp_storage, sort_temp_storage_size, (unsigned *)dev_cell_id.begin(), sort_out_keys, (int *)dev_particle_id.begin(), sort_out_values, num_atoms)){
|
||||
printf("AtomT::sort_neighbor: DeviceRadixSort error\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
if(hipSuccess != hipMemcpy((unsigned *)dev_cell_id.begin(), sort_out_keys , num_atoms*sizeof(unsigned), hipMemcpyDeviceToDevice) ||
|
||||
hipSuccess != hipMemcpy((int *) dev_particle_id.begin(), sort_out_values, num_atoms*sizeof(int ), hipMemcpyDeviceToDevice)){
|
||||
printf("AtomT::sort_neighbor: copy output error\n");
|
||||
UCL_GERYON_EXIT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef GPU_CAST
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -29,6 +29,11 @@ using namespace ucl_opencl;
|
|||
#include "geryon/nvc_mat.h"
|
||||
#include "geryon/nvc_kernel.h"
|
||||
using namespace ucl_cudart;
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_timer.h"
|
||||
#include "geryon/hip_mat.h"
|
||||
#include "geryon/hip_kernel.h"
|
||||
using namespace ucl_hip;
|
||||
#else
|
||||
#include "geryon/nvd_timer.h"
|
||||
#include "geryon/nvd_mat.h"
|
||||
|
@ -477,6 +482,14 @@ class Atom {
|
|||
CUDPPConfiguration sort_config;
|
||||
CUDPPHandle sort_plan;
|
||||
#endif
|
||||
|
||||
#ifdef USE_HIP_DEVICE_SORT
|
||||
unsigned* sort_out_keys = nullptr;
|
||||
int* sort_out_values = nullptr;
|
||||
void* sort_temp_storage = nullptr;
|
||||
size_t sort_temp_storage_size = 0;
|
||||
size_t sort_out_size = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#ifdef USE_OPENCL
|
||||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#ifdef USE_OPENCL
|
||||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,16 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : ndtrung@umich.edu
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : ndtrung@umich.edu
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : a.kohlmeyer@temple.edu
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -268,7 +268,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
|||
gpu_nbor=1;
|
||||
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
|
||||
gpu_nbor=2;
|
||||
#ifndef USE_CUDPP
|
||||
#if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
|
||||
if (gpu_nbor==1)
|
||||
gpu_nbor=2;
|
||||
#endif
|
||||
|
@ -341,7 +341,7 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
|||
gpu_nbor=1;
|
||||
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
|
||||
gpu_nbor=2;
|
||||
#ifndef USE_CUDPP
|
||||
#if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
|
||||
if (gpu_nbor==1)
|
||||
gpu_nbor=2;
|
||||
#endif
|
||||
|
@ -712,7 +712,7 @@ int DeviceT::compile_kernels() {
|
|||
gpu_lib_data.update_host(false);
|
||||
|
||||
_ptx_arch=static_cast<double>(gpu_lib_data[0])/100.0;
|
||||
#ifndef USE_OPENCL
|
||||
#if !(defined(USE_OPENCL) || defined(USE_HIP))
|
||||
if (_ptx_arch>gpu->arch() || floor(_ptx_arch)<floor(gpu->arch()))
|
||||
return -4;
|
||||
#endif
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,16 +13,16 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
_texture( mu_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
_texture_2d( mu_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,17 +13,17 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
_texture( mu_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
_texture_2d( mu_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,16 +13,16 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
_texture( mu_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
_texture_2d( mu_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,14 +13,14 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> vel_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( vel_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4,1> vel_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture_2d( vel_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,27 +13,27 @@
|
|||
// email : brownw@ornl.gov nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> fp_tex;
|
||||
texture<float4> rhor_sp1_tex;
|
||||
texture<float4> rhor_sp2_tex;
|
||||
texture<float4> frho_sp1_tex;
|
||||
texture<float4> frho_sp2_tex;
|
||||
texture<float4> z2r_sp1_tex;
|
||||
texture<float4> z2r_sp2_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( fp_tex,float);
|
||||
_texture( rhor_sp1_tex,float4);
|
||||
_texture( rhor_sp2_tex,float4);
|
||||
_texture( frho_sp1_tex,float4);
|
||||
_texture( frho_sp2_tex,float4);
|
||||
_texture( z2r_sp1_tex,float4);
|
||||
_texture( z2r_sp2_tex,float4);
|
||||
#else
|
||||
texture<int4> pos_tex;
|
||||
texture<int2> fp_tex;
|
||||
texture<int4> rhor_sp1_tex;
|
||||
texture<int4> rhor_sp2_tex;
|
||||
texture<int4> frho_sp1_tex;
|
||||
texture<int4> frho_sp2_tex;
|
||||
texture<int4> z2r_sp1_tex;
|
||||
texture<int4> z2r_sp2_tex;
|
||||
_texture( pos_tex,int4);
|
||||
_texture( fp_tex,int2);
|
||||
_texture( rhor_sp1_tex,int4);
|
||||
_texture( rhor_sp2_tex,int4);
|
||||
_texture( frho_sp1_tex,int4);
|
||||
_texture( frho_sp2_tex,int4);
|
||||
_texture( z2r_sp1_tex,int4);
|
||||
_texture( z2r_sp2_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -18,12 +18,14 @@
|
|||
|
||||
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex, quat_tex;
|
||||
_texture( pos_tex, float4);
|
||||
_texture( quat_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex, quat_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture_2d( quat_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,19 +13,19 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float> gcons_tex;
|
||||
texture<float> dgcons_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
_texture( gcons_tex,float);
|
||||
_texture( dgcons_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int2> gcons_tex;
|
||||
texture<int2> dgcons_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
_texture( gcons_tex,int2);
|
||||
_texture( dgcons_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,13 +13,13 @@
|
|||
// email : ibains@nvidia.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,13 +13,13 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -23,7 +23,7 @@ const char *lj_tip4p=0;
|
|||
|
||||
#include "lal_lj_tip4p_long.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
namespace LAMMPS_AL {
|
||||
#define LJTIP4PLongT LJ_TIP4PLong<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
@ -370,6 +370,5 @@ int** LJTIP4PLongT::compute(const int ago, const int inum_full,
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template class LJ_TIP4PLong<PRECISION,ACC_PRECISION>;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : thevsevak@gmail.com
|
||||
// ***************************************************************************
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
|
@ -27,11 +27,11 @@
|
|||
#define tagint int
|
||||
#endif
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,13 +13,13 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
// email : penwang@nvidia.com, brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_preprocessor.h"
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
#define tagint int
|
||||
|
@ -27,9 +27,9 @@
|
|||
#define tagint int
|
||||
#endif
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
|
||||
__kernel void calc_cell_id(const numtyp4 *restrict pos,
|
||||
|
|
|
@ -24,6 +24,10 @@ using namespace ucl_opencl;
|
|||
#include "geryon/nvc_kernel.h"
|
||||
#include "geryon/nvc_texture.h"
|
||||
using namespace ucl_cudart;
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_kernel.h"
|
||||
#include "geryon/hip_texture.h"
|
||||
using namespace ucl_hip;
|
||||
#else
|
||||
#include "geryon/nvd_kernel.h"
|
||||
#include "geryon/nvd_texture.h"
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_preprocessor.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( q_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( q_tex,int2);
|
||||
#endif
|
||||
|
||||
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
#include "geryon/ocl_texture.h"
|
||||
#elif defined(USE_CUDART)
|
||||
#include "geryon/nvc_texture.h"
|
||||
#elif defined(USE_HIP)
|
||||
#include "geryon/hip_texture.h"
|
||||
#else
|
||||
#include "geryon/nvd_texture.h"
|
||||
#endif
|
||||
|
|
|
@ -24,9 +24,11 @@ struct _lgpu_int2 {
|
|||
int x; int y;
|
||||
};
|
||||
|
||||
#ifndef USE_HIP
|
||||
#ifndef int2
|
||||
#define int2 _lgpu_int2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
struct _lgpu_float2 {
|
||||
float x; float y;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// **************************************************************************
|
||||
// **************************************************************************
|
||||
// preprocessor.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
|
@ -60,6 +60,150 @@
|
|||
//
|
||||
//*************************************************************************/
|
||||
|
||||
#define _texture(name, type) texture<type> name
|
||||
#define _texture_2d(name, type) texture<type,1> name
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// HIP DEFINITIONS
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
#ifdef USE_HIP
|
||||
#include <hip/hip_runtime.h>
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#define mul24(x, y) __mul24(x, y)
|
||||
#undef _texture
|
||||
#undef _texture_2d
|
||||
#define _texture(name, type) __device__ type* name
|
||||
#define _texture_2d(name, type) __device__ type* name
|
||||
#endif
|
||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
|
||||
#define THREAD_ID_X threadIdx.x
|
||||
#define THREAD_ID_Y threadIdx.y
|
||||
#define BLOCK_ID_X blockIdx.x
|
||||
#define BLOCK_ID_Y blockIdx.y
|
||||
#define BLOCK_SIZE_X blockDim.x
|
||||
#define BLOCK_SIZE_Y blockDim.y
|
||||
#define __kernel extern "C" __global__
|
||||
#ifdef __local
|
||||
#undef __local
|
||||
#endif
|
||||
#define __local __shared__
|
||||
#define __global
|
||||
#define restrict __restrict__
|
||||
#define atom_add atomicAdd
|
||||
#define ucl_inline static __inline__ __device__
|
||||
|
||||
#define THREADS_PER_ATOM 4
|
||||
#define THREADS_PER_CHARGE 8
|
||||
#define BLOCK_NBOR_BUILD 128
|
||||
#define BLOCK_PAIR 256
|
||||
#define BLOCK_BIO_PAIR 256
|
||||
#define BLOCK_ELLIPSE 128
|
||||
#define MAX_SHARED_TYPES 11
|
||||
|
||||
#ifdef _SINGLE_SINGLE
|
||||
ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
return __shfl_xor(var, laneMask, width);
|
||||
#else
|
||||
return __shfl_xor_sync(0xffffffff, var, laneMask, width);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
int2 tmp;
|
||||
tmp.x = __double2hiint(var);
|
||||
tmp.y = __double2loint(var);
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
tmp.x = __shfl_xor(tmp.x,laneMask,width);
|
||||
tmp.y = __shfl_xor(tmp.y,laneMask,width);
|
||||
#else
|
||||
tmp.x = __shfl_xor_sync(0xffffffff, tmp.x,laneMask,width);
|
||||
tmp.y = __shfl_xor_sync(0xffffffff, tmp.y,laneMask,width);
|
||||
#endif
|
||||
return __hiloint2double(tmp.x,tmp.y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#define ARCH 600
|
||||
#define WARP_SIZE 64
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
#define ARCH __CUDA_ARCH__
|
||||
#define WARP_SIZE 32
|
||||
#endif
|
||||
|
||||
#define fast_mul(X,Y) (X)*(Y)
|
||||
|
||||
#define MEM_THREADS WARP_SIZE
|
||||
#define PPPM_BLOCK_1D 64
|
||||
#define BLOCK_CELL_2D 8
|
||||
#define BLOCK_CELL_ID 128
|
||||
#define MAX_BIO_SHARED_TYPES 128
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define fetch4(ans,i,pos_tex) { \
|
||||
int4 xy = tex1Dfetch(pos_tex,i*2); \
|
||||
int4 zt = tex1Dfetch(pos_tex,i*2+1); \
|
||||
ans.x=__hiloint2double(xy.y, xy.x); \
|
||||
ans.y=__hiloint2double(xy.w, xy.z); \
|
||||
ans.z=__hiloint2double(zt.y, zt.x); \
|
||||
ans.w=__hiloint2double(zt.w, zt.z); \
|
||||
}
|
||||
#define fetch(ans,i,q_tex) { \
|
||||
int2 qt = tex1Dfetch(q_tex,i); \
|
||||
ans=__hiloint2double(qt.y, qt.x); \
|
||||
}
|
||||
#else
|
||||
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
|
||||
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
|
||||
#endif
|
||||
#else
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define fetch4(ans,i,pos_tex) (ans=*(((double4*)pos_tex) + i))
|
||||
#define fetch(ans,i,q_tex) (ans=*(((double *) q_tex) + i))
|
||||
#else
|
||||
#define fetch4(ans,i,pos_tex) (ans=*(((float4*)pos_tex) + i))
|
||||
#define fetch(ans,i,q_tex) (ans=*(((float *) q_tex) + i))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define ucl_exp exp
|
||||
#define ucl_powr pow
|
||||
#define ucl_atan atan
|
||||
#define ucl_cbrt cbrt
|
||||
#define ucl_ceil ceil
|
||||
#define ucl_abs fabs
|
||||
#define ucl_rsqrt rsqrt
|
||||
#define ucl_sqrt sqrt
|
||||
#define ucl_recip(x) ((numtyp)1.0/(x))
|
||||
|
||||
#else
|
||||
#define ucl_atan atanf
|
||||
#define ucl_cbrt cbrtf
|
||||
#define ucl_ceil ceilf
|
||||
#define ucl_abs fabsf
|
||||
#define ucl_recip(x) ((numtyp)1.0/(x))
|
||||
#define ucl_rsqrt rsqrtf
|
||||
#define ucl_sqrt sqrtf
|
||||
|
||||
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
||||
#define ucl_exp expf
|
||||
#define ucl_powr powf
|
||||
#else
|
||||
#define ucl_exp __expf
|
||||
#define ucl_powr __powf
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// CUDA DEFINITIONS
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,19 +13,19 @@
|
|||
// email : brownw@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> sw1_tex;
|
||||
texture<float4> sw2_tex;
|
||||
texture<float4> sw3_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( sw1_tex,float4);
|
||||
_texture( sw2_tex,float4);
|
||||
_texture( sw3_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4> sw1_tex;
|
||||
texture<int4> sw2_tex;
|
||||
texture<int4> sw3_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( sw1_tex,int4);
|
||||
_texture( sw2_tex,int4);
|
||||
_texture( sw3_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,23 +13,23 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_tersoff_extra.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> ts1_tex;
|
||||
texture<float4> ts2_tex;
|
||||
texture<float4> ts3_tex;
|
||||
texture<float4> ts4_tex;
|
||||
texture<float4> ts5_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( ts1_tex,float4);
|
||||
_texture( ts2_tex,float4);
|
||||
_texture( ts3_tex,float4);
|
||||
_texture( ts4_tex,float4);
|
||||
_texture( ts5_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4> ts1_tex;
|
||||
texture<int4> ts2_tex;
|
||||
texture<int4> ts3_tex;
|
||||
texture<int4> ts4_tex;
|
||||
texture<int4> ts5_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( ts1_tex,int4);
|
||||
_texture( ts2_tex,int4);
|
||||
_texture( ts3_tex,int4);
|
||||
_texture( ts4_tex,int4);
|
||||
_texture( ts5_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#ifndef LAL_TERSOFF_EXTRA_H
|
||||
#define LAL_TERSOFF_EXTRA_H
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#else
|
||||
#endif
|
||||
|
|
|
@ -13,23 +13,23 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_tersoff_mod_extra.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> ts1_tex;
|
||||
texture<float4> ts2_tex;
|
||||
texture<float4> ts3_tex;
|
||||
texture<float4> ts4_tex;
|
||||
texture<float4> ts5_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( ts1_tex,float4);
|
||||
_texture( ts2_tex,float4);
|
||||
_texture( ts3_tex,float4);
|
||||
_texture( ts4_tex,float4);
|
||||
_texture( ts5_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4> ts1_tex;
|
||||
texture<int4> ts2_tex;
|
||||
texture<int4> ts3_tex;
|
||||
texture<int4> ts4_tex;
|
||||
texture<int4> ts5_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( ts1_tex,int4);
|
||||
_texture( ts2_tex,int4);
|
||||
_texture( ts3_tex,int4);
|
||||
_texture( ts4_tex,int4);
|
||||
_texture( ts5_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#ifndef LAL_TERSOFF_MOD_EXTRA_H
|
||||
#define LAL_TERSOFF_MOD_EXTRA_H
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#else
|
||||
#endif
|
||||
|
|
|
@ -13,25 +13,25 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_tersoff_zbl_extra.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> ts1_tex;
|
||||
texture<float4> ts2_tex;
|
||||
texture<float4> ts3_tex;
|
||||
texture<float4> ts4_tex;
|
||||
texture<float4> ts5_tex;
|
||||
texture<float4> ts6_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( ts1_tex,float4);
|
||||
_texture( ts2_tex,float4);
|
||||
_texture( ts3_tex,float4);
|
||||
_texture( ts4_tex,float4);
|
||||
_texture( ts5_tex,float4);
|
||||
_texture( ts6_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4> ts1_tex;
|
||||
texture<int4> ts2_tex;
|
||||
texture<int4> ts3_tex;
|
||||
texture<int4> ts4_tex;
|
||||
texture<int4> ts5_tex;
|
||||
texture<int4> ts6_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( ts1_tex,int4);
|
||||
_texture( ts2_tex,int4);
|
||||
_texture( ts3_tex,int4);
|
||||
_texture( ts4_tex,int4);
|
||||
_texture( ts5_tex,int4);
|
||||
_texture( ts6_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#ifndef LAL_TERSOFF_ZBL_EXTRA_H
|
||||
#define LAL_TERSOFF_ZBL_EXTRA_H
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#else
|
||||
#endif
|
||||
|
|
|
@ -15,12 +15,12 @@
|
|||
dekoning@ifi.unicamp.br
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,23 +13,23 @@
|
|||
// email : andershaf@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float4> param1_tex;
|
||||
texture<float4> param2_tex;
|
||||
texture<float4> param3_tex;
|
||||
texture<float4> param4_tex;
|
||||
texture<float4> param5_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( param1_tex,float4);
|
||||
_texture( param2_tex,float4);
|
||||
_texture( param3_tex,float4);
|
||||
_texture( param4_tex,float4);
|
||||
_texture( param5_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int4> param1_tex;
|
||||
texture<int4> param2_tex;
|
||||
texture<int4> param3_tex;
|
||||
texture<int4> param4_tex;
|
||||
texture<int4> param5_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( param1_tex,int4);
|
||||
_texture( param2_tex,int4);
|
||||
_texture( param3_tex,int4);
|
||||
_texture( param4_tex,int4);
|
||||
_texture( param5_tex,int4);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> rad_tex;
|
||||
_texture( pos_tex,float4);
|
||||
_texture( rad_tex,float);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> rad_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
_texture( rad_tex,int2);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
|
@ -13,12 +13,12 @@
|
|||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
_texture( pos_tex,float4);
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
_texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
#else
|
||||
#define pos_tex x_
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
# hip = MPI with HIP(clang)
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = mpicxx
|
||||
CCFLAGS = -g -O3
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
HIP_PATH ?= $(wildcard /opt/rocm/hip)
|
||||
LINK = $(HIP_PATH)/bin/hipcc
|
||||
LINKFLAGS = -g -O3 $(shell mpicxx --showme:link)
|
||||
LIB =
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings
|
||||
# see possible settings in Section 2.2 (step 4) of manual
|
||||
|
||||
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64
|
||||
|
||||
# MPI library
|
||||
# see discussion in Section 2.2 (step 5) of manual
|
||||
# MPI wrapper compiler/linker can provide this info
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
|
||||
MPI_PATH =
|
||||
MPI_LIB =
|
||||
|
||||
# FFT library
|
||||
# see discussion in Section 2.2 (step 6) of manual
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC =
|
||||
FFT_PATH =
|
||||
FFT_LIB =
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB =
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# do not edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
ifeq (nvcc,${HIP_PLATFORM})
|
||||
# fix nvcc can't handle -pthread flag
|
||||
LINKFLAGS := $(subst -pthread,-Xcompiler -pthread,$(LINKFLAGS))
|
||||
endif
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
|
||||
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
depend : fastdep.exe $(SRC)
|
||||
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
|
||||
|
||||
fastdep.exe: ../DEPEND/fastdep.c
|
||||
cc -O -o $@ $<
|
||||
|
||||
sinclude .depend
|
Loading…
Reference in New Issue