Merge 'gpu_hip_port' into master

This commit is contained in:
Vsevak 2020-01-28 20:09:40 +03:00
parent 5eef3b1828
commit 66c5fa2abd
91 changed files with 2290 additions and 312 deletions

148
lib/gpu/Makefile.hip Normal file
View File

@ -0,0 +1,148 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for HIP
# - export HIP_PLATFORM=hcc (or nvcc) before execution
# - change HIP_ARCH for your GPU
# ------------------------------------------------------------------------- */
# this setting should match LAMMPS Makefile
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
LMP_INC = -DLAMMPS_SMALLBIG
# precision for GPU calculations
# -D_SINGLE_SINGLE # Single precision for all calculations
# -D_DOUBLE_DOUBLE # Double precision for all calculations
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
HIP_PRECISION = -D_SINGLE_DOUBLE
HIP_OPTS = -O3
HIP_HOST_OPTS = -Wno-deprecated-declarations
HIP_HOST_INCLUDE =
# use device sort
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
# path to cub
HIP_HOST_INCLUDE += -I./
# path to hipcub
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
# use mpi
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
# this settings should match LAMMPS Makefile
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
#MPI_COMP_OPTS += -I/usr/include/mpi -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1
HIP_PATH ?= $(wildcard /opt/rocm/hip)
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
ifeq (hcc,$(HIP_PLATFORM))
HIP_OPTS += -ffast-math
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (nvcc,$(HIP_PLATFORM))
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
else
$(error Specify HIP platform using 'export HIP_PLATFORM=(hcc,nvcc)')
endif
BIN_DIR = .
OBJ_DIR = ./obj
LIB_DIR = .
AR = ar
BSH = /bin/sh
# /* ----------------------------------------------------------------------
# don't change section below without need
# ------------------------------------------------------------------------- */
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
HIP_GPU_OPTS += $(HIP_OPTS) -I./
ifeq (hcc,$(HIP_PLATFORM))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
HIP_GPU_OPTS_S = -t="$(HIP_ARCH)" -f=\"
HIP_GPU_OPTS_E = \"
HIP_KERNEL_SUFFIX = .cpp
HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH)
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
else ifeq (nvcc,$(HIP_PLATFORM))
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --fatbin
HIP_GPU_OPTS += $(HIP_ARCH)
HIP_GPU_SORT_ARCH = $(HIP_ARCH)
# fix nvcc can't handle -pthread flag
MPI_COMP_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_COMP_OPTS))
MPI_LINK_OPTS := $(subst -pthread,-Xcompiler -pthread,$(MPI_LINK_OPTS))
endif
# hipcc is essential for device sort, because of hipcub is header only library and ROCm gpu code generation is deferred to the linking stage
HIP_HOST_CC = $(HIP_PATH)/bin/hipcc
HIP_HOST_OPTS += $(HIP_OPTS) $(MPI_COMP_OPTS) $(LMP_INC)
HIP_HOST_CC_CMD = $(HIP_HOST_CC) $(HIP_HOST_OPTS) $(HIP_HOST_INCLUDE)
# sources
ALL_H = $(wildcard ./geryon/ucl*.h) $(wildcard ./geryon/hip*.h) $(wildcard ./lal_*.h)
SRCS := $(wildcard ./lal_*.cpp)
OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
CUS := $(wildcard lal_*.cu)
CUHS := $(filter-out pppm_cubin.h, $(CUS:lal_%.cu=%_cubin.h)) pppm_f_cubin.h pppm_d_cubin.h
CUHS := $(addprefix $(OBJ_DIR)/, $(CUHS))
all: $(OBJ_DIR) $(CUHS) $(LIB_DIR)/libgpu.a $(BIN_DIR)/hip_get_devices
$(OBJ_DIR):
mkdir -p $@
# GPU kernels compilation
$(OBJ_DIR)/pppm_f_cubin.h: lal_pppm.cu $(ALL_H)
@cp $< $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX)
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=float -Dgrdtyp4=float4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX)
@xxd -i $(OBJ_DIR)/pppm_f.cubin $@
@sed -i "s/[a-zA-Z0-9_]*pppm_f_cubin/pppm_f/g" $@
@rm $(OBJ_DIR)/temp_pppm_f.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_f.cubin
$(OBJ_DIR)/pppm_d_cubin.h: lal_pppm.cu $(ALL_H)
@cp $< $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX)
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) -Dgrdtyp=double -Dgrdtyp4=double4 $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX)
@xxd -i $(OBJ_DIR)/pppm_d.cubin $@
@sed -i "s/[a-zA-Z0-9_]*pppm_d_cubin/pppm_d/g" $@
@rm $(OBJ_DIR)/temp_pppm_d.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/pppm_d.cubin
$(OBJ_DIR)/%_cubin.h: lal_%.cu $(ALL_H)
@cp $< $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX)
$(HIP_GPU_CC) $(HIP_GPU_OPTS_S) $(HIP_GPU_OPTS) $(HIP_GPU_OPTS_E) -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX)
@xxd -i $(OBJ_DIR)/$*.cubin $@
@sed -i "s/[a-zA-Z0-9_]*$*_cubin/$*/g" $@
@rm $(OBJ_DIR)/temp_$*.cu$(HIP_KERNEL_SUFFIX) $(OBJ_DIR)/$*.cubin
# host sources compilation
$(OBJ_DIR)/lal_atom.o: lal_atom.cpp $(CUHS) $(ALL_H)
$(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR) $(HIP_GPU_SORT_ARCH)
$(OBJ_DIR)/lal_%.o: lal_%.cpp $(CUHS) $(ALL_H)
$(HIP_HOST_CC_CMD) -o $@ -c $< -I$(OBJ_DIR)
# libgpu building
$(LIB_DIR)/libgpu.a: $(OBJS)
$(AR) -crs $@ $(OBJS)
echo "export HIP_PLATFORM := $(HIP_PLATFORM)\n$(HIP_LIBS_TARGET)" > 'Makefile.lammps'
# test app building
$(BIN_DIR)/hip_get_devices: ./geryon/ucl_get_devices.cpp $(ALL_H)
$(HIP_HOST_CC_CMD) -o $@ $< -DUCL_HIP $(MPI_LINK_OPTS)
clean:
-rm -f $(BIN_DIR)/hip_get_devices $(LIB_DIR)/libgpu.a $(OBJS) $(OBJ_DIR)/temp_* $(CUHS)

519
lib/gpu/geryon/hip_device.h Normal file
View File

@ -0,0 +1,519 @@
/* -----------------------------------------------------------------------
Copyright (2009) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
#ifndef HIP_DEVICE
#define HIP_DEVICE
#include <hip/hip_runtime.h>
#include <unordered_map>
#include <string>
#include <vector>
#include <iostream>
#include "hip_macros.h"
#include "ucl_types.h"
namespace ucl_hip {
// --------------------------------------------------------------------------
// - COMMAND QUEUE STUFF
// --------------------------------------------------------------------------
typedef hipStream_t command_queue;
inline void ucl_sync(hipStream_t &stream) {
CU_SAFE_CALL(hipStreamSynchronize(stream));
}
struct NVDProperties {
int device_id;
std::string name;
int major;
int minor;
CUDA_INT_TYPE totalGlobalMem;
int multiProcessorCount;
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int sharedMemPerBlock;
int totalConstantMemory;
int SIMDWidth;
int memPitch;
int regsPerBlock;
int clockRate;
int textureAlign;
int kernelExecTimeoutEnabled;
int integrated;
int canMapHostMemory;
int concurrentKernels;
int ECCEnabled;
int computeMode;
};
/// Class for looking at device properties
/** \note Calls to change the device outside of the class results in incorrect
* behavior
* \note There is no error checking for indexing past the number of devices **/
class UCL_Device {
public:
/// Collect properties for every GPU on the node
/** \note You must set the active GPU with set() before using the device **/
inline UCL_Device();
inline ~UCL_Device();
/// Returns 1 (For compatibility with OpenCL)
inline int num_platforms() { return 1; }
/// Return a string with name and info of the current platform
inline std::string platform_name()
{ return "HIP platform"; }
/// Delete any contexts/data and set the platform number to be used
inline int set_platform(const int pid);
/// Return the number of devices that support CUDA
inline int num_devices() { return _properties.size(); }
/// Set the CUDA device to the specified device number
/** A context and default command queue will be created for the device
* Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not
* be allocated for use. clear() is called to delete any contexts and
* associated data from previous calls to set(). **/
inline int set(int num);
/// Delete any context and associated data stored from a call to set()
inline void clear();
/// Get the current device number
inline int device_num() { return _device; }
/// Returns the default stream for the current device
inline command_queue & cq() { return cq(0); }
/// Returns the stream indexed by i
inline command_queue & cq(const int i) { return _cq[i]; }
/// Block until all commands in the default stream have completed
inline void sync() { sync(0); }
/// Block until all commands in the specified stream have completed
inline void sync(const int i) { ucl_sync(cq(i)); }
/// Get the number of command queues currently available on device
inline int num_queues()
{ return _cq.size(); }
/// Add a stream for device computations
inline void push_command_queue() {
_cq.push_back(hipStream_t());
CU_SAFE_CALL(hipStreamCreateWithFlags(&_cq.back(),0));
}
/// Remove a stream for device computations
/** \note You cannot delete the default stream **/
inline void pop_command_queue() {
if (_cq.size()<2) return;
CU_SAFE_CALL_NS(hipStreamDestroy(_cq.back()));
_cq.pop_back();
}
/// Set the default command queue (by default this is the null stream)
/** \param i index of the command queue (as added by push_command_queue())
If i is 0, the default command queue is set to the null stream **/
inline void set_command_queue(const int i) {
if (i==0) _cq[0]=0;
else _cq[0]=_cq[i];
}
/// Get the current CUDA device name
inline std::string name() { return name(_device); }
/// Get the CUDA device name
inline std::string name(const int i)
{ return std::string(_properties[i].name); }
/// Get a string telling the type of the current device
inline std::string device_type_name() { return device_type_name(_device); }
/// Get a string telling the type of the device
inline std::string device_type_name(const int i) { return "GPU"; }
/// Get current device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
inline int device_type() { return device_type(_device); }
/// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
inline int device_type(const int i) { return UCL_GPU; }
/// Returns true if host memory is efficiently addressable from device
inline bool shared_memory() { return shared_memory(_device); }
/// Returns true if host memory is efficiently addressable from device
inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; }
/// Returns true if double precision is support for the current device
inline bool double_precision() { return double_precision(_device); }
/// Returns true if double precision is support for the device
inline bool double_precision(const int i) {return arch(i)>=1.3;}
/// Get the number of compute units on the current device
inline unsigned cus() { return cus(_device); }
/// Get the number of compute units
inline unsigned cus(const int i)
{ return _properties[i].multiProcessorCount; }
/// Get the number of cores in the current device
inline unsigned cores() { return cores(_device); }
/// Get the number of cores
inline unsigned cores(const int i)
{ if (arch(i)<2.0) return _properties[i].multiProcessorCount*8;
else if (arch(i)<2.1) return _properties[i].multiProcessorCount*32;
else if (arch(i)<3.0) return _properties[i].multiProcessorCount*48;
else return _properties[i].multiProcessorCount*192; }
/// Get the gigabytes of global memory in the current device
inline double gigabytes() { return gigabytes(_device); }
/// Get the gigabytes of global memory
inline double gigabytes(const int i)
{ return static_cast<double>(_properties[i].totalGlobalMem)/1073741824; }
/// Get the bytes of global memory in the current device
inline size_t bytes() { return bytes(_device); }
/// Get the bytes of global memory
inline size_t bytes(const int i) { return _properties[i].totalGlobalMem; }
// Get the gigabytes of free memory in the current device
inline double free_gigabytes() { return free_gigabytes(_device); }
// Get the gigabytes of free memory
inline double free_gigabytes(const int i)
{ return static_cast<double>(free_bytes(i))/1073741824; }
// Get the bytes of free memory in the current device
inline size_t free_bytes() { return free_bytes(_device); }
// Get the bytes of free memory
inline size_t free_bytes(const int i) {
CUDA_INT_TYPE dfree, dtotal;
CU_SAFE_CALL_NS(hipMemGetInfo(&dfree, &dtotal));
return static_cast<size_t>(dfree);
}
/// Return the GPGPU compute capability for current device
inline double arch() { return arch(_device); }
/// Return the GPGPU compute capability
inline double arch(const int i)
{ return static_cast<double>(_properties[i].minor)/10+_properties[i].major;}
/// Clock rate in GHz for current device
inline double clock_rate() { return clock_rate(_device); }
/// Clock rate in GHz
inline double clock_rate(const int i)
{ return _properties[i].clockRate*1e-6;}
/// Get the maximum number of threads per block
inline size_t group_size() { return group_size(_device); }
/// Get the maximum number of threads per block
inline size_t group_size(const int i)
{ return _properties[i].maxThreadsPerBlock; }
/// Return the maximum memory pitch in bytes for current device
inline size_t max_pitch() { return max_pitch(_device); }
/// Return the maximum memory pitch in bytes
inline size_t max_pitch(const int i) { return _properties[i].memPitch; }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported() { return sharing_supported(_device); }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported(const int i)
{ return (_properties[i].computeMode == hipComputeModeDefault); }
/// True if splitting device into equal subdevices supported
inline bool fission_equal()
{ return fission_equal(_device); }
/// True if splitting device into equal subdevices supported
inline bool fission_equal(const int i)
{ return false; }
/// True if splitting device into subdevices by specified counts supported
inline bool fission_by_counts()
{ return fission_by_counts(_device); }
/// True if splitting device into subdevices by specified counts supported
inline bool fission_by_counts(const int i)
{ return false; }
/// True if splitting device into subdevices by affinity domains supported
inline bool fission_by_affinity()
{ return fission_by_affinity(_device); }
/// True if splitting device into subdevices by affinity domains supported
inline bool fission_by_affinity(const int i)
{ return false; }
/// Maximum number of subdevices allowed from device fission
inline int max_sub_devices()
{ return max_sub_devices(_device); }
/// Maximum number of subdevices allowed from device fission
inline int max_sub_devices(const int i)
{ return 0; }
/// List all devices along with all properties
inline void print_all(std::ostream &out);
/// Select the platform that has accelerators (for compatibility with OpenCL)
inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
inline int load_module(const void* program, hipModule_t& module, std::string *log=NULL){
auto it = _loaded_modules.emplace(program, hipModule_t());
if(!it.second){
module = it.first->second;
return UCL_SUCCESS;
}
const unsigned int num_opts=2;
hipJitOption options[num_opts];
void *values[num_opts];
// set up size of compilation log buffer
options[0] = hipJitOptionInfoLogBufferSizeBytes;
values[0] = (void *)(int)10240;
// set up pointer to the compilation log buffer
options[1] = hipJitOptionInfoLogBuffer;
char clog[10240] = { 0 };
values[1] = clog;
hipError_t err=hipModuleLoadDataEx(&module,program,num_opts, options,(void **)values);
if (log!=NULL)
*log=std::string(clog);
if (err != hipSuccess) {
#ifndef UCL_NO_EXIT
std::cerr << std::endl
<< "----------------------------------------------------------\n"
<< " UCL Error: Error compiling PTX Program...\n"
<< "----------------------------------------------------------\n";
std::cerr << log << std::endl;
#endif
_loaded_modules.erase(it.first);
return UCL_COMPILE_ERROR;
}
it.first->second = module;
return UCL_SUCCESS;
}
private:
std::unordered_map<const void*, hipModule_t> _loaded_modules;
int _device, _num_devices;
std::vector<NVDProperties> _properties;
std::vector<hipStream_t> _cq;
hipDevice_t _cu_device;
};
// Grabs the properties for all devices
UCL_Device::UCL_Device() {
CU_SAFE_CALL_NS(hipInit(0));
CU_SAFE_CALL_NS(hipGetDeviceCount(&_num_devices));
for (int i=0; i<_num_devices; ++i) {
hipDevice_t dev;
CU_SAFE_CALL_NS(hipDeviceGet(&dev,i));
int major, minor;
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, dev));
if (major==9999)
continue;
NVDProperties prop;
prop.device_id = i;
prop.major=major;
prop.minor=minor;
char namecstr[1024];
CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev));
prop.name=namecstr;
CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
//#if CUDA_VERSION >= 2020
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev));
//#endif
//#if CUDA_VERSION >= 3010
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
//#endif
_properties.push_back(prop);
}
_device=-1;
_cq.push_back(hipStream_t());
_cq.back()=0;
}
UCL_Device::~UCL_Device() {
clear();
}
int UCL_Device::set_platform(const int pid) {
clear();
#ifdef UCL_DEBUG
assert(pid<num_platforms());
#endif
return UCL_SUCCESS;
}
// Set the CUDA device to the specified device number
int UCL_Device::set(int num) {
clear();
_device=_properties[num].device_id;
hipError_t err=hipDeviceGet(&_cu_device,_device);
if (err!=hipSuccess) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not access accelerator number " << num
<< " for use.\n";
UCL_GERYON_EXIT;
#endif
return UCL_ERROR;
}
//hipError_t err=hipCtxCreate(&_context,0,_cu_device); deprecated and unnecessary
err=hipSetDevice(_device);
if (err!=hipSuccess) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not set accelerator number " << num
<< " for use.\n";
UCL_GERYON_EXIT;
#endif
return UCL_ERROR;
}
return UCL_SUCCESS;
}
void UCL_Device::clear() {
if (_device>-1) {
for (int i=1; i<num_queues(); i++) pop_command_queue();
CU_SAFE_CALL_NS(hipDeviceReset());
}
_device=-1;
}
// List all devices along with all properties
void UCL_Device::print_all(std::ostream &out) {
//#if CUDA_VERSION >= 2020
int driver_version;
hipDriverGetVersion(&driver_version);
out << "Driver Version: "
<< driver_version/1000 << "." << driver_version%100
<< std::endl;
//#endif
if (num_devices() == 0)
out << "There is no device supporting HIP\n";
for (int i=0; i<num_devices(); ++i) {
out << "\nDevice " << i << ": \"" << name(i) << "\"\n";
out << " Type of device: "
<< device_type_name(i).c_str() << std::endl;
out << " Compute capability: "
<< arch(i) << std::endl;
out << " Double precision support: ";
if (double_precision(i))
out << "Yes\n";
else
out << "No\n";
out << " Total amount of global memory: "
<< gigabytes(i) << " GB\n";
//#if CUDA_VERSION >= 2000
out << " Number of compute units/multiprocessors: "
<< _properties[i].multiProcessorCount << std::endl;
out << " Number of cores: "
<< cores(i) << std::endl;
//#endif
out << " Total amount of constant memory: "
<< _properties[i].totalConstantMemory << " bytes\n";
out << " Total amount of local/shared memory per block: "
<< _properties[i].sharedMemPerBlock << " bytes\n";
out << " Total number of registers available per block: "
<< _properties[i].regsPerBlock << std::endl;
out << " Warp size: "
<< _properties[i].SIMDWidth << std::endl;
out << " Maximum number of threads per block: "
<< _properties[i].maxThreadsPerBlock << std::endl;
out << " Maximum group size (# of threads per block) "
<< _properties[i].maxThreadsDim[0] << " x "
<< _properties[i].maxThreadsDim[1] << " x "
<< _properties[i].maxThreadsDim[2] << std::endl;
out << " Maximum item sizes (# threads for each dim) "
<< _properties[i].maxGridSize[0] << " x "
<< _properties[i].maxGridSize[1] << " x "
<< _properties[i].maxGridSize[2] << std::endl;
//out << " Maximum memory pitch: "
// << max_pitch(i) << " bytes\n";
//out << " Texture alignment: "
// << _properties[i].textureAlign << " bytes\n";
out << " Clock rate: "
<< clock_rate(i) << " GHz\n";
//#if CUDA_VERSION >= 2020
//out << " Run time limit on kernels: ";
//if (_properties[i].kernelExecTimeoutEnabled)
// out << "Yes\n";
//else
// out << "No\n";
out << " Integrated: ";
if (_properties[i].integrated)
out << "Yes\n";
else
out << "No\n";
//out << " Support host page-locked memory mapping: ";
//if (_properties[i].canMapHostMemory)
// out << "Yes\n";
//else
// out << "No\n";
out << " Compute mode: ";
if (_properties[i].computeMode == hipComputeModeDefault)
out << "Default\n"; // multiple threads can use device
//#if CUDA_VERSION >= 8000
// else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
//#else
else if (_properties[i].computeMode == hipComputeModeExclusive)
//#endif
out << "Exclusive\n"; // only thread can use device
else if (_properties[i].computeMode == hipComputeModeProhibited)
out << "Prohibited\n"; // no thread can use device
//#if CUDART_VERSION >= 4000
else if (_properties[i].computeMode == hipComputeModeExclusiveProcess)
out << "Exclusive Process\n"; // multiple threads 1 process
//#endif
else
out << "Unknown\n";
//#endif
//#if CUDA_VERSION >= 3010
out << " Concurrent kernel execution: ";
if (_properties[i].concurrentKernels)
out << "Yes\n";
else
out << "No\n";
//out << " Device has ECC support enabled: ";
//if (_properties[i].ECCEnabled)
// out << "Yes\n";
//else
// out << "No\n";
//#endif
}
}
}
#endif

298
lib/gpu/geryon/hip_kernel.h Normal file
View File

@ -0,0 +1,298 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
#ifndef HIP_KERNEL
#define HIP_KERNEL
#include <hip/hip_runtime.h>
#include "hip_device.h"
#include <fstream>
#include <string>
#include <iostream>
namespace ucl_hip {
class UCL_Texture;
template <class numtyp> class UCL_D_Vec;
template <class numtyp> class UCL_D_Mat;
template <class hosttype, class devtype> class UCL_Vector;
template <class hosttype, class devtype> class UCL_Matrix;
#define UCL_MAX_KERNEL_ARGS 256
/// Class storing 1 or more kernel functions from a single string or file
class UCL_Program {
UCL_Device* _device_ptr;
public:
inline UCL_Program(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); }
inline UCL_Program(UCL_Device &device, const void *program,
const char *flags="", std::string *log=NULL) {
_device_ptr = &device; _cq=device.cq();
init(device);
load_string(program,flags,log);
}
inline ~UCL_Program() {}
/// Initialize the program with a device
inline void init(UCL_Device &device) { _device_ptr = &device; _cq=device.cq(); }
/// Clear any data associated with program
/** \note Must call init() after each clear **/
inline void clear() { }
/// Load a program from a file and compile with flags
inline int load(const char *filename, const char *flags="", std::string *log=NULL) {
std::ifstream in(filename);
if (!in || in.is_open()==false) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not open kernel file: "
<< filename << std::endl;
UCL_GERYON_EXIT;
#endif
return UCL_FILE_NOT_FOUND;
}
std::string program((std::istreambuf_iterator<char>(in)),
std::istreambuf_iterator<char>());
in.close();
return load_string(program.c_str(),flags,log);
}
/// Load a program from a string and compile with flags
inline int load_string(const void *program, const char *flags="", std::string *log=NULL) {
return _device_ptr->load_module(program, _module, log);
}
friend class UCL_Kernel;
private:
hipModule_t _module;
hipStream_t _cq;
friend class UCL_Texture;
};
/// Class for dealing with CUDA Driver kernels
class UCL_Kernel {
public:
UCL_Kernel() : _dimensions(1), _num_args(0) {
_num_blocks[0]=0;
}
UCL_Kernel(UCL_Program &program, const char *function) :
_dimensions(1), _num_args(0) {
_num_blocks[0]=0;
set_function(program,function);
_cq=program._cq;
}
~UCL_Kernel() {}
/// Clear any function associated with the kernel
inline void clear() { }
/// Get the kernel function from a program
/** \ret UCL_ERROR_FLAG (UCL_SUCCESS, UCL_FILE_NOT_FOUND, UCL_ERROR) **/
inline int set_function(UCL_Program &program, const char *function) {
hipError_t err=hipModuleGetFunction(&_kernel,program._module,function);
if (err!=hipSuccess) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not find function: " << function
<< " in program.\n";
UCL_GERYON_EXIT;
#endif
return UCL_FUNCTION_NOT_FOUND;
}
_cq=program._cq;
return UCL_SUCCESS;
}
/// Set the kernel argument.
/** If not a device pointer, this must be repeated each time the argument
* changes
* \note To set kernel parameter i (i>0), parameter i-1 must be set **/
template <class dtype>
inline void set_arg(const unsigned index, const dtype * const arg) {
if (index==_num_args)
add_arg(arg);
else if (index<_num_args){
assert(0==1); // not implemented
}
else
assert(0==1); // Must add kernel parameters in sequential order
}
/// Set a geryon container as a kernel argument.
template <class numtyp>
inline void set_arg(const UCL_D_Vec<numtyp> * const arg)
{ set_arg(&arg->begin()); }
/// Set a geryon container as a kernel argument.
template <class numtyp>
inline void set_arg(const UCL_D_Mat<numtyp> * const arg)
{ set_arg(&arg->begin()); }
/// Set a geryon container as a kernel argument.
template <class hosttype, class devtype>
inline void set_arg(const UCL_Vector<hosttype, devtype> * const arg)
{ set_arg(&arg->device.begin()); }
/// Set a geryon container as a kernel argument.
template <class hosttype, class devtype>
inline void set_arg(const UCL_Matrix<hosttype, devtype> * const arg)
{ set_arg(&arg->device.begin()); }
/// Add a kernel argument.
inline void add_arg(const hipDeviceptr_t* const arg) {
add_arg<void*>((void**)arg);
}
/// Add a kernel argument.
template <class dtype>
inline void add_arg(const dtype* const arg) {
const auto old_size = _hip_kernel_args.size();
const auto aligned_size = (old_size+alignof(dtype)-1) & ~(alignof(dtype)-1);
const auto arg_size = sizeof(dtype);
_hip_kernel_args.resize(aligned_size + arg_size);
*((dtype*)(&_hip_kernel_args[aligned_size])) = *arg;
_num_args++;
if (_num_args>UCL_MAX_KERNEL_ARGS) assert(0==1);
}
/// Add a geryon container as a kernel argument.
template <class numtyp>
inline void add_arg(const UCL_D_Vec<numtyp> * const arg)
{ add_arg(&arg->begin()); }
/// Add a geryon container as a kernel argument.
template <class numtyp>
inline void add_arg(const UCL_D_Mat<numtyp> * const arg)
{ add_arg(&arg->begin()); }
/// Add a geryon container as a kernel argument.
template <class hosttype, class devtype>
inline void add_arg(const UCL_Vector<hosttype, devtype> * const arg)
{ add_arg(&arg->device.begin()); }
/// Add a geryon container as a kernel argument.
template <class hosttype, class devtype>
inline void add_arg(const UCL_Matrix<hosttype, devtype> * const arg)
{ add_arg(&arg->device.begin()); }
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue is used for the kernel execution **/
inline void set_size(const size_t num_blocks, const size_t block_size) {
_dimensions=1;
_num_blocks[0]=num_blocks;
_num_blocks[1]=1;
_num_blocks[2]=1;
_block_size[0]=block_size;
_block_size[1]=1;
_block_size[2]=1;
}
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue for the kernel is changed to cq **/
inline void set_size(const size_t num_blocks, const size_t block_size,
command_queue &cq)
{ _cq=cq; set_size(num_blocks,block_size); }
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue is used for the kernel execution **/
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
const size_t block_size_x, const size_t block_size_y) {
_dimensions=2;
_num_blocks[0]=num_blocks_x;
_num_blocks[1]=num_blocks_y;
_num_blocks[2]=1;
_block_size[0]=block_size_x;
_block_size[1]=block_size_y;
_block_size[2]=1;
}
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue for the kernel is changed to cq **/
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
const size_t block_size_x, const size_t block_size_y,
command_queue &cq)
{_cq=cq; set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y);}
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue is used for the kernel execution **/
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
const size_t block_size_x,
const size_t block_size_y, const size_t block_size_z) {
_dimensions=2;
_num_blocks[0]=num_blocks_x;
_num_blocks[1]=num_blocks_y;
_num_blocks[2]=1;
_block_size[0]=block_size_x;
_block_size[1]=block_size_y;
_block_size[2]=block_size_z;
}
/// Set the number of thread blocks and the number of threads in each block
/** \note This should be called before any arguments have been added
\note The default command queue is used for the kernel execution **/
inline void set_size(const size_t num_blocks_x, const size_t num_blocks_y,
const size_t block_size_x, const size_t block_size_y,
const size_t block_size_z, command_queue &cq) {
_cq=cq;
set_size(num_blocks_x, num_blocks_y, block_size_x, block_size_y,
block_size_z);
}
/// Run the kernel in the default command queue
inline void run() {
size_t args_size = _hip_kernel_args.size();
void *config[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, (void*)_hip_kernel_args.data(),
HIP_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
HIP_LAUNCH_PARAM_END
};
const auto res = hipModuleLaunchKernel(_kernel,_num_blocks[0],_num_blocks[1],
_num_blocks[2],_block_size[0],_block_size[1],
_block_size[2],0,_cq, NULL, config);
CU_SAFE_CALL(res);
//#endif
}
/// Clear any arguments associated with the kernel
inline void clear_args() {
_num_args=0;
_hip_kernel_args.clear();
}
/// Return the default command queue/stream associated with this data
inline command_queue & cq() { return _cq; }
/// Change the default command queue associated with matrix
inline void cq(command_queue &cq_in) { _cq=cq_in; }
#include "ucl_arg_kludge.h"
private:
hipFunction_t _kernel;
hipStream_t _cq;
unsigned _dimensions;
unsigned _num_blocks[3];
unsigned _num_args;
friend class UCL_Texture;
unsigned _block_size[3];
std::vector<char> _hip_kernel_args;
};
} // namespace
#endif

View File

@ -0,0 +1,83 @@
#ifndef HIP_MACROS_H
#define HIP_MACROS_H
#include <cstdio>
#include <cassert>
#include <hip/hip_runtime.h>
//#if CUDA_VERSION >= 3020
#define CUDA_INT_TYPE size_t
//#else
//#define CUDA_INT_TYPE unsigned
//#endif
#ifdef MPI_GERYON
#include "mpi.h"
#define NVD_GERYON_EXIT do { \
int is_final; \
MPI_Finalized(&is_final); \
if (!is_final) \
MPI_Abort(MPI_COMM_WORLD,-1); \
} while(0)
#else
#define NVD_GERYON_EXIT assert(0==1)
#endif
#ifndef UCL_GERYON_EXIT
#define UCL_GERYON_EXIT NVD_GERYON_EXIT
#endif
#ifdef UCL_DEBUG
#define UCL_SYNC_DEBUG
#define UCL_DESTRUCT_CHECK
#endif
#ifndef UCL_NO_API_CHECK
#define CU_SAFE_CALL_NS( call ) do { \
hipError_t err = call; \
if( hipSuccess != err) { \
fprintf(stderr, "HIP runtime error %d in call at file '%s' in line %i.\n", \
err, __FILE__, __LINE__ ); \
NVD_GERYON_EXIT; \
} } while (0)
#ifdef UCL_SYNC_DEBUG
#define CU_SAFE_CALL( call ) do { \
CU_SAFE_CALL_NS( call ); \
hipError_t err=hipCtxSynchronize(); \
if( hipSuccess != err) { \
fprintf(stderr, "HIP runtime error %d in file '%s' in line %i.\n", \
err, __FILE__, __LINE__ ); \
NVD_GERYON_EXIT; \
} } while (0)
#else
#define CU_SAFE_CALL( call ) CU_SAFE_CALL_NS( call )
#endif
#else // not DEBUG
// void macros for performance reasons
#define CU_SAFE_CALL_NS( call ) call
#define CU_SAFE_CALL( call) call
#endif
#ifdef UCL_DESTRUCT_CHECK
#define CU_DESTRUCT_CALL( call) CU_SAFE_CALL( call)
#define CU_DESTRUCT_CALL_NS( call) CU_SAFE_CALL_NS( call)
#else
#define CU_DESTRUCT_CALL( call) call
#define CU_DESTRUCT_CALL_NS( call) call
#endif
#endif

43
lib/gpu/geryon/hip_mat.h Normal file
View File

@ -0,0 +1,43 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
/*! \file */
#ifndef HIP_MAT_H
#define HIP_MAT_H
#include <hip/hip_runtime.h>
#include "hip_memory.h"
/// Namespace for CUDA Driver routines
namespace ucl_hip {
#define _UCL_MAT_ALLOW
#define _UCL_DEVICE_PTR_MAT
#include "ucl_basemat.h"
#include "ucl_h_vec.h"
#include "ucl_h_mat.h"
#include "ucl_d_vec.h"
#include "ucl_d_mat.h"
#include "ucl_s_obj_help.h"
#include "ucl_vector.h"
#include "ucl_matrix.h"
#undef _UCL_DEVICE_PTR_MAT
#undef _UCL_MAT_ALLOW
#define UCL_COPY_ALLOW
#include "ucl_copy.h"
#undef UCL_COPY_ALLOW
#define UCL_PRINT_ALLOW
#include "ucl_print.h"
#undef UCL_PRINT_ALLOW
} // namespace ucl_cudadr
#endif

279
lib/gpu/geryon/hip_memory.h Normal file
View File

@ -0,0 +1,279 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
#ifndef HIP_MEMORY_H
#define HIP_MEMORY_H
#include <hip/hip_runtime.h>
#include <iostream>
#include <cassert>
#include <cstring>
#include "hip_macros.h"
#include "hip_device.h"
#include "ucl_types.h"
namespace ucl_hip {
// --------------------------------------------------------------------------
// - API Specific Types
// --------------------------------------------------------------------------
//typedef dim3 ucl_kernel_dim;
#ifdef __HIP_PLATFORM_NVCC__
typedef enum hipArray_Format {
HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
HIP_AD_FORMAT_HALF = 0x10,
HIP_AD_FORMAT_FLOAT = 0x20
}hipArray_Format;
#endif
// --------------------------------------------------------------------------
// - API SPECIFIC DEVICE POINTERS
// --------------------------------------------------------------------------
typedef hipDeviceptr_t device_ptr;
// --------------------------------------------------------------------------
// - HOST MEMORY ALLOCATION ROUTINES
// --------------------------------------------------------------------------
template <class mat_type, class copy_type>
inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
hipError_t err=hipSuccess;
if (kind==UCL_NOT_PINNED)
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
else if (kind==UCL_WRITE_ONLY)
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
else
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
return UCL_MEMORY_ERROR;
mat.cq()=cm.cq();
return UCL_SUCCESS;
}
template <class mat_type>
inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
const enum UCL_MEMOPT kind, const enum UCL_MEMOPT kind2){
hipError_t err=hipSuccess;
if (kind==UCL_NOT_PINNED)
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
else if (kind==UCL_WRITE_ONLY)
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
else
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
return UCL_MEMORY_ERROR;
mat.cq()=dev.cq();
return UCL_SUCCESS;
}
template <class mat_type>
inline void _host_free(mat_type &mat) {
if (mat.kind()==UCL_VIEW)
return;
else if (mat.kind()!=UCL_NOT_PINNED)
CU_DESTRUCT_CALL(hipHostFree(mat.begin()));
else
free(mat.begin());
}
template <class mat_type>
inline int _host_resize(mat_type &mat, const size_t n) {
_host_free(mat);
hipError_t err=hipSuccess;
if (mat.kind()==UCL_NOT_PINNED)
*(mat.host_ptr())=(typename mat_type::data_type*)malloc(n);
else if (mat.kind()==UCL_WRITE_ONLY)
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocWriteCombined);
else
err=hipHostMalloc((void **)mat.host_ptr(),n,hipHostMallocDefault);
if (err!=hipSuccess || *(mat.host_ptr())==NULL)
return UCL_MEMORY_ERROR;
return UCL_SUCCESS;
}
// --------------------------------------------------------------------------
// - DEVICE MEMORY ALLOCATION ROUTINES
// --------------------------------------------------------------------------
template <class mat_type, class copy_type>
inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t n,
const enum UCL_MEMOPT kind) {
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
mat.cq()=cm.cq();
return UCL_SUCCESS;
}
template <class mat_type>
inline int _device_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
const enum UCL_MEMOPT kind) {
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
mat.cq()=dev.cq();
return UCL_SUCCESS;
}
template <class mat_type, class copy_type>
inline int _device_alloc(mat_type &mat, copy_type &cm, const size_t rows,
const size_t cols, size_t &pitch,
const enum UCL_MEMOPT kind) {
hipError_t err;
size_t upitch;
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
cols*sizeof(typename mat_type::data_type),rows);
pitch=static_cast<size_t>(upitch);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
mat.cq()=cm.cq();
return UCL_SUCCESS;
}
template <class mat_type, class copy_type>
inline int _device_alloc(mat_type &mat, UCL_Device &d, const size_t rows,
const size_t cols, size_t &pitch,
const enum UCL_MEMOPT kind) {
hipError_t err;
size_t upitch;
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
cols*sizeof(typename mat_type::data_type),rows);
pitch=static_cast<size_t>(upitch);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
mat.cq()=d.cq();
return UCL_SUCCESS;
}
template <class mat_type>
inline void _device_free(mat_type &mat) {
if (mat.kind()!=UCL_VIEW){
CU_DESTRUCT_CALL(hipFree((void*)mat.cbegin()));
}
}
template <class mat_type>
inline int _device_resize(mat_type &mat, const size_t n) {
_device_free(mat);
hipError_t err=hipMalloc((void**)&mat.cbegin(),n);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
return UCL_SUCCESS;
}
template <class mat_type>
inline int _device_resize(mat_type &mat, const size_t rows,
const size_t cols, size_t &pitch) {
_device_free(mat);
hipError_t err;
size_t upitch;
err=hipMallocPitch((void**)&mat.cbegin(),&upitch,
cols*sizeof(typename mat_type::data_type),rows);
pitch=static_cast<size_t>(upitch);
if (err!=hipSuccess)
return UCL_MEMORY_ERROR;
return UCL_SUCCESS;
}
inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in) {
*ptr=in;
}
template <class numtyp>
inline void _device_view(hipDeviceptr_t *ptr, numtyp *in) {
*ptr=0;
}
inline void _device_view(hipDeviceptr_t *ptr, hipDeviceptr_t &in,
const size_t offset, const size_t numsize) {
*ptr=(hipDeviceptr_t)(((char*)in)+offset*numsize);
}
template <class numtyp>
inline void _device_view(hipDeviceptr_t *ptr, numtyp *in,
const size_t offset, const size_t numsize) {
*ptr=0;
}
// --------------------------------------------------------------------------
// - DEVICE IMAGE ALLOCATION ROUTINES
// --------------------------------------------------------------------------
template <class mat_type, class copy_type>
inline void _device_image_alloc(mat_type &mat, copy_type &cm, const size_t rows,
const size_t cols) {
assert(0==1);
}
template <class mat_type, class copy_type>
inline void _device_image_alloc(mat_type &mat, UCL_Device &d, const size_t rows,
const size_t cols) {
assert(0==1);
}
template <class mat_type>
inline void _device_image_free(mat_type &mat) {
assert(0==1);
}
// --------------------------------------------------------------------------
// - ZERO ROUTINES
// --------------------------------------------------------------------------
inline void _host_zero(void *ptr, const size_t n) {
memset(ptr,0,n);
}
template <class mat_type>
inline void _device_zero(mat_type &mat, const size_t n, command_queue &cq) {
CU_SAFE_CALL(hipMemsetAsync((void*)mat.cbegin(),0,n,cq));
}
// --------------------------------------------------------------------------
// - MEMCPY ROUTINES
// --------------------------------------------------------------------------
template<class mat1, class mat2>
hipMemcpyKind _memcpy_kind(mat1 &dst, const mat2 &src){
assert(mat1::MEM_TYPE < 2 && mat2::MEM_TYPE < 2);
return (hipMemcpyKind)((1 - mat2::MEM_TYPE)*2 + (1 - mat1::MEM_TYPE));
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n) {
CU_SAFE_CALL(hipMemcpy((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src)));
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const mat2 &src, const size_t n, hipStream_t &cq) {
CU_SAFE_CALL(hipMemcpyAsync((void*)dst.begin(), (void*)src.begin(), n, _memcpy_kind(dst, src), cq));
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
const size_t rows) {
CU_SAFE_CALL(hipMemcpy2D((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src)));
}
template<class mat1, class mat2>
inline void ucl_mv_cpy(mat1 &dst, const size_t dpitch, const mat2 &src,
const size_t spitch, const size_t cols,
const size_t rows,hipStream_t &cq) {
CU_SAFE_CALL(hipMemcpy2DAsync((void*)dst.begin(), dpitch, (void*)src.begin(), spitch, cols, rows, _memcpy_kind(dst, src), cq));
}
} // namespace ucl_cudart
#endif

View File

@ -0,0 +1,113 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
#ifndef HIP_TEXTURE
#define HIP_TEXTURE
#include <hip/hip_runtime.h>
#include "hip_kernel.h"
#include "hip_mat.h"
namespace ucl_hip {
#ifdef __HIP_PLATFORM_NVCC__
inline hipError_t hipModuleGetTexRef(CUtexref* texRef, hipModule_t hmod, const char* name){
return hipCUResultTohipError(cuModuleGetTexRef(texRef, hmod, name));
}
inline hipError_t hipTexRefSetFormat(CUtexref tex, hipArray_Format fmt, int NumPackedComponents) {
return hipCUResultTohipError(cuTexRefSetFormat(tex, (CUarray_format)fmt, NumPackedComponents ));
}
inline hipError_t hipTexRefSetAddress(size_t* offset, CUtexref tex, hipDeviceptr_t devPtr, size_t size) {
return hipCUResultTohipError(cuTexRefSetAddress(offset, tex, devPtr, size));
}
#endif
/// Class storing a texture reference
class UCL_Texture {
public:
UCL_Texture() {}
~UCL_Texture() {}
/// Construct with a specified texture reference
inline UCL_Texture(UCL_Program &prog, const char *texture_name)
{ get_texture(prog,texture_name); }
/// Set the texture reference for this object
inline void get_texture(UCL_Program &prog, const char *texture_name)
{
#ifdef __HIP_PLATFORM_NVCC__
CU_SAFE_CALL(hipModuleGetTexRef(&_tex, prog._module, texture_name));
#else
size_t _global_var_size;
CU_SAFE_CALL(hipModuleGetGlobal(&_device_ptr_to_global_var, &_global_var_size, prog._module, texture_name));
#endif
}
/// Bind a float array where each fetch grabs a vector of length numel
template<class numtyp>
inline void bind_float(UCL_D_Vec<numtyp> &vec, const unsigned numel)
{ _bind_float(vec,numel); }
/// Bind a float array where each fetch grabs a vector of length numel
template<class numtyp>
inline void bind_float(UCL_D_Mat<numtyp> &vec, const unsigned numel)
{ _bind_float(vec,numel); }
/// Bind a float array where each fetch grabs a vector of length numel
template<class numtyp, class devtyp>
inline void bind_float(UCL_Vector<numtyp, devtyp> &vec, const unsigned numel)
{ _bind_float(vec.device,numel); }
/// Bind a float array where each fetch grabs a vector of length numel
template<class numtyp, class devtyp>
inline void bind_float(UCL_Matrix<numtyp, devtyp> &vec, const unsigned numel)
{ _bind_float(vec.device,numel); }
/// Unbind the texture reference from the memory allocation
inline void unbind() { }
/// Make a texture reference available to kernel
inline void allow(UCL_Kernel &kernel) {
//#if CUDA_VERSION < 4000
//CU_SAFE_CALL(cuParamSetTexRef(kernel._kernel, CU_PARAM_TR_DEFAULT, _tex));
//#endif
}
private:
#ifdef __HIP_PLATFORM_NVCC__
CUtexref _tex;
#else
void* _device_ptr_to_global_var;
#endif
friend class UCL_Kernel;
template<class mat_typ>
inline void _bind_float(mat_typ &vec, const unsigned numel) {
#ifdef UCL_DEBUG
assert(numel!=0 && numel<5);
#endif
#ifdef __HIP_PLATFORM_NVCC__
if (vec.element_size()==sizeof(float))
CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_FLOAT, numel));
else {
if (numel>2)
CU_SAFE_CALL(hipTexRefSetFormat(_tex, HIP_AD_FORMAT_SIGNED_INT32, numel));
else
CU_SAFE_CALL(hipTexRefSetFormat(_tex,HIP_AD_FORMAT_SIGNED_INT32,numel*2));
}
CU_SAFE_CALL(hipTexRefSetAddress(NULL, _tex, vec.cbegin(), vec.numel()*vec.element_size()));
#else
void* data_ptr = (void*)vec.cbegin();
CU_SAFE_CALL(hipMemcpyHtoD(hipDeviceptr_t(_device_ptr_to_global_var), &data_ptr, sizeof(void*)));
#endif
}
};
} // namespace
#endif

107
lib/gpu/geryon/hip_timer.h Normal file
View File

@ -0,0 +1,107 @@
/* -----------------------------------------------------------------------
Copyright (2010) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the Simplified BSD License.
----------------------------------------------------------------------- */
#ifndef HIP_TIMER_H
#define HIP_TIMER_H
#include <hip/hip_runtime.h>
#include "hip_macros.h"
#include "hip_device.h"
namespace ucl_hip {
/// Class for timing CUDA Driver events
class UCL_Timer {
public:
inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
{ init(dev); }
inline ~UCL_Timer() { clear(); }
/// Clear any data associated with timer
/** \note init() must be called to reuse timer after a clear() **/
inline void clear() {
if (_initialized) {
CU_DESTRUCT_CALL(hipEventDestroy(start_event));
CU_DESTRUCT_CALL(hipEventDestroy(stop_event));
_initialized=false;
_total_time=0.0;
}
}
/// Initialize default command queue for timing
inline void init(UCL_Device &dev) { init(dev, dev.cq()); }
/// Initialize command queue for timing
inline void init(UCL_Device &dev, command_queue &cq) {
clear();
_cq=cq;
_initialized=true;
CU_SAFE_CALL( hipEventCreateWithFlags(&start_event,0) );
CU_SAFE_CALL( hipEventCreateWithFlags(&stop_event,0) );
}
/// Start timing on command queue
inline void start() { CU_SAFE_CALL(hipEventRecord(start_event,_cq)); }
/// Stop timing on command queue
inline void stop() { CU_SAFE_CALL(hipEventRecord(stop_event,_cq)); }
/// Block until the start event has been reached on device
inline void sync_start()
{ CU_SAFE_CALL(hipEventSynchronize(start_event)); }
/// Block until the stop event has been reached on device
inline void sync_stop()
{ CU_SAFE_CALL(hipEventSynchronize(stop_event)); }
/// Set the time elapsed to zero (not the total_time)
inline void zero() {
CU_SAFE_CALL(hipEventRecord(start_event,_cq));
CU_SAFE_CALL(hipEventRecord(stop_event,_cq));
}
/// Set the total time to zero
inline void zero_total() { _total_time=0.0; }
/// Add time from previous start and stop to total
/** Forces synchronization **/
inline double add_to_total()
{ double t=time(); _total_time+=t; return t/1000.0; }
/// Add a user specified time to the total (ms)
inline void add_time_to_total(const double t) { _total_time+=t; }
/// Return the time (ms) of last start to stop - Forces synchronization
inline double time() {
float timer;
CU_SAFE_CALL(hipEventSynchronize(stop_event));
CU_SAFE_CALL( hipEventElapsedTime(&timer,start_event,stop_event) );
return timer;
}
/// Return the time (s) of last start to stop - Forces synchronization
inline double seconds() { return time()/1000.0; }
/// Return the total time in ms
inline double total_time() { return _total_time; }
/// Return the total time in seconds
inline double total_seconds() { return _total_time/1000.0; }
private:
hipEvent_t start_event, stop_event;
hipStream_t _cq;
double _total_time;
bool _initialized;
};
} // namespace
#endif

View File

@ -36,6 +36,11 @@ using namespace ucl_cudadr;
using namespace ucl_cudart;
#endif
#ifdef UCL_HIP
#include "hip_device.h"
using namespace ucl_hip;
#endif
int main(int argc, char** argv) {
UCL_Device cop;
std::cout << "Found " << cop.num_platforms() << " platform(s).\n";

View File

@ -179,13 +179,15 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
if (_eflag) {
for (int i=0; i<_inum; i++)
evdwl+=engv[i];
if (_ef_atom)
if (_ilist==NULL)
if (_ef_atom) {
if (_ilist==NULL) {
for (int i=0; i<_inum; i++)
eatom[i]+=engv[i];
else
} else {
for (int i=0; i<_inum; i++)
eatom[_ilist[i]]+=engv[i];
}
}
vstart=_inum;
}
if (_vflag) {
@ -193,7 +195,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int j=0; j<6; j++) {
for (int i=vstart; i<iend; i++)
virial[j]+=engv[i];
if (_vf_atom)
if (_vf_atom){
if (_ilist==NULL) {
int ii=0;
for (int i=vstart; i<iend; i++)
@ -203,6 +205,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int i=vstart; i<iend; i++)
vatom[_ilist[ii++]][j]+=engv[i];
}
}
vstart+=_inum;
iend+=_inum;
}
@ -228,7 +231,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
evdwl+=engv[i];
for (int i=_inum; i<iend; i++)
ecoul+=engv[i];
if (_ef_atom)
if (_ef_atom) {
if (_ilist==NULL) {
for (int i=0; i<_inum; i++)
eatom[i]+=engv[i];
@ -240,6 +243,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int i=_inum, ii=0; i<iend; i++)
eatom[_ilist[ii++]]+=engv[i];
}
}
vstart=iend;
iend+=_inum;
}
@ -247,7 +251,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int j=0; j<6; j++) {
for (int i=vstart; i<iend; i++)
virial[j]+=engv[i];
if (_vf_atom)
if (_vf_atom) {
if (_ilist==NULL) {
for (int i=vstart, ii=0; i<iend; i++)
vatom[ii++][j]+=engv[i];
@ -255,6 +259,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
for (int i=vstart, ii=0; i<iend; i++)
vatom[_ilist[ii++]][j]+=engv[i];
}
}
vstart+=_inum;
iend+=_inum;
}

View File

@ -27,6 +27,10 @@ using namespace ucl_opencl;
#include "geryon/nvc_timer.h"
#include "geryon/nvc_mat.h"
using namespace ucl_cudart;
#elif defined(USE_HIP)
#include "geryon/hip_timer.h"
#include "geryon/hip_mat.h"
using namespace ucl_hip;
#else
#include "geryon/nvd_timer.h"
#include "geryon/nvd_mat.h"

View File

@ -15,6 +15,11 @@
#include "lal_atom.h"
#ifdef USE_HIP_DEVICE_SORT
#include <hip/hip_runtime.h>
#include <hipcub/hipcub.hpp>
#endif
namespace LAMMPS_AL {
#define AtomT Atom<numtyp,acctyp>
@ -70,6 +75,26 @@ bool AtomT::alloc(const int nall) {
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
size_t temp_storage_bytes = 0;
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
return false;
if(sort_out_size < _max_atoms){
if (sort_out_keys ) hipFree(sort_out_keys);
if (sort_out_values) hipFree(sort_out_values);
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
sort_out_size = _max_atoms;
}
if(temp_storage_bytes > sort_temp_storage_size){
if(sort_temp_storage) hipFree(sort_temp_storage);
hipMalloc(&sort_temp_storage, temp_storage_bytes);
sort_temp_storage_size = temp_storage_bytes;
}
}
#endif
// --------------------------- Device allocations
int gpu_bytes=0;
success=success && (x.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
@ -184,6 +209,27 @@ bool AtomT::add_fields(const bool charge, const bool rot,
return false;
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
size_t temp_storage_bytes = 0;
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, sort_out_keys, sort_out_keys, sort_out_values, sort_out_values, _max_atoms))
return false;
if(sort_out_size < _max_atoms){
if (sort_out_keys ) hipFree(sort_out_keys);
if (sort_out_values) hipFree(sort_out_values);
hipMalloc(&sort_out_keys , _max_atoms * sizeof(unsigned));
hipMalloc(&sort_out_values, _max_atoms * sizeof(int ));
sort_out_size = _max_atoms;
}
if(temp_storage_bytes > sort_temp_storage_size){
if(sort_temp_storage) hipFree(sort_temp_storage);
hipMalloc(&sort_temp_storage, temp_storage_bytes);
sort_temp_storage_size = temp_storage_bytes;
}
}
#endif
success=success && (dev_particle_id.alloc(_max_atoms,*dev,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=dev_particle_id.row_bytes();
@ -275,6 +321,19 @@ void AtomT::clear_resize() {
if (_gpu_nbor==1) cudppDestroyPlan(sort_plan);
#endif
#ifdef USE_HIP_DEVICE_SORT
if (_gpu_nbor==1) {
if(sort_out_keys) hipFree(sort_out_keys);
if(sort_out_values) hipFree(sort_out_values);
if(sort_temp_storage) hipFree(sort_temp_storage);
sort_out_keys = nullptr;
sort_out_values = nullptr;
sort_temp_storage = nullptr;
sort_temp_storage_size = 0;
sort_out_size = 0;
}
#endif
if (_gpu_nbor==2) {
host_particle_id.clear();
host_cell_id.clear();
@ -326,6 +385,22 @@ void AtomT::sort_neighbor(const int num_atoms) {
UCL_GERYON_EXIT;
}
#endif
#ifdef USE_HIP_DEVICE_SORT
if(sort_out_size < num_atoms){
printf("AtomT::sort_neighbor: invalid temp buffer size\n");
UCL_GERYON_EXIT;
}
if(hipSuccess != hipcub::DeviceRadixSort::SortPairs(sort_temp_storage, sort_temp_storage_size, (unsigned *)dev_cell_id.begin(), sort_out_keys, (int *)dev_particle_id.begin(), sort_out_values, num_atoms)){
printf("AtomT::sort_neighbor: DeviceRadixSort error\n");
UCL_GERYON_EXIT;
}
if(hipSuccess != hipMemcpy((unsigned *)dev_cell_id.begin(), sort_out_keys , num_atoms*sizeof(unsigned), hipMemcpyDeviceToDevice) ||
hipSuccess != hipMemcpy((int *) dev_particle_id.begin(), sort_out_values, num_atoms*sizeof(int ), hipMemcpyDeviceToDevice)){
printf("AtomT::sort_neighbor: copy output error\n");
UCL_GERYON_EXIT;
}
#endif
}
#ifdef GPU_CAST

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#endif

View File

@ -29,6 +29,11 @@ using namespace ucl_opencl;
#include "geryon/nvc_mat.h"
#include "geryon/nvc_kernel.h"
using namespace ucl_cudart;
#elif defined(USE_HIP)
#include "geryon/hip_timer.h"
#include "geryon/hip_mat.h"
#include "geryon/hip_kernel.h"
using namespace ucl_hip;
#else
#include "geryon/nvd_timer.h"
#include "geryon/nvd_mat.h"
@ -477,6 +482,14 @@ class Atom {
CUDPPConfiguration sort_config;
CUDPPHandle sort_plan;
#endif
#ifdef USE_HIP_DEVICE_SORT
unsigned* sort_out_keys = nullptr;
int* sort_out_values = nullptr;
void* sort_temp_storage = nullptr;
size_t sort_temp_storage_size = 0;
size_t sort_out_size = 0;
#endif
};
}

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#endif

View File

@ -24,6 +24,8 @@
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -25,6 +25,8 @@
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -23,6 +23,8 @@
#ifdef USE_OPENCL
#include "geryon/ocl_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -23,6 +23,8 @@
#ifdef USE_OPENCL
#include "geryon/ocl_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -24,6 +24,8 @@
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -24,6 +24,8 @@
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,16 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : ndtrung@umich.edu
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : ndtrung@umich.edu
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : a.kohlmeyer@temple.edu
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -268,7 +268,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
gpu_nbor=1;
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
gpu_nbor=2;
#ifndef USE_CUDPP
#if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
if (gpu_nbor==1)
gpu_nbor=2;
#endif
@ -341,7 +341,7 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
gpu_nbor=1;
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
gpu_nbor=2;
#ifndef USE_CUDPP
#if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
if (gpu_nbor==1)
gpu_nbor=2;
#endif
@ -712,7 +712,7 @@ int DeviceT::compile_kernels() {
gpu_lib_data.update_host(false);
_ptx_arch=static_cast<double>(gpu_lib_data[0])/100.0;
#ifndef USE_OPENCL
#if !(defined(USE_OPENCL) || defined(USE_HIP))
if (_ptx_arch>gpu->arch() || floor(_ptx_arch)<floor(gpu->arch()))
return -4;
#endif

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#endif

View File

@ -13,16 +13,16 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
texture<float4> mu_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
_texture( mu_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
texture<int4,1> mu_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
_texture_2d( mu_tex,int4);
#endif
#else

View File

@ -13,17 +13,17 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
texture<float4> mu_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
_texture( mu_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
texture<int4,1> mu_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
_texture_2d( mu_tex,int4);
#endif
#else

View File

@ -13,16 +13,16 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
texture<float4> mu_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
_texture( mu_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
texture<int4,1> mu_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
_texture_2d( mu_tex,int4);
#endif
#else

View File

@ -13,14 +13,14 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> vel_tex;
_texture( pos_tex,float4);
_texture( vel_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4,1> vel_tex;
_texture_2d( pos_tex,int4);
_texture_2d( vel_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,27 +13,27 @@
// email : brownw@ornl.gov nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> fp_tex;
texture<float4> rhor_sp1_tex;
texture<float4> rhor_sp2_tex;
texture<float4> frho_sp1_tex;
texture<float4> frho_sp2_tex;
texture<float4> z2r_sp1_tex;
texture<float4> z2r_sp2_tex;
_texture( pos_tex,float4);
_texture( fp_tex,float);
_texture( rhor_sp1_tex,float4);
_texture( rhor_sp2_tex,float4);
_texture( frho_sp1_tex,float4);
_texture( frho_sp2_tex,float4);
_texture( z2r_sp1_tex,float4);
_texture( z2r_sp2_tex,float4);
#else
texture<int4> pos_tex;
texture<int2> fp_tex;
texture<int4> rhor_sp1_tex;
texture<int4> rhor_sp2_tex;
texture<int4> frho_sp1_tex;
texture<int4> frho_sp2_tex;
texture<int4> z2r_sp1_tex;
texture<int4> z2r_sp2_tex;
_texture( pos_tex,int4);
_texture( fp_tex,int2);
_texture( rhor_sp1_tex,int4);
_texture( rhor_sp2_tex,int4);
_texture( frho_sp1_tex,int4);
_texture( frho_sp2_tex,int4);
_texture( z2r_sp1_tex,int4);
_texture( z2r_sp2_tex,int4);
#endif
#else

View File

@ -18,12 +18,14 @@
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex, quat_tex;
_texture( pos_tex, float4);
_texture( quat_tex,float4);
#else
texture<int4,1> pos_tex, quat_tex;
_texture_2d( pos_tex,int4);
_texture_2d( quat_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,12 +13,12 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"
#endif

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"
#endif

View File

@ -13,12 +13,12 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,12 +13,12 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,19 +13,19 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
texture<float> gcons_tex;
texture<float> dgcons_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
_texture( gcons_tex,float);
_texture( dgcons_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
texture<int2> gcons_tex;
texture<int2> dgcons_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
_texture( gcons_tex,int2);
_texture( dgcons_tex,int2);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,13 +13,13 @@
// email : ibains@nvidia.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else

View File

@ -13,15 +13,15 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,13 +13,13 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -23,7 +23,7 @@ const char *lj_tip4p=0;
#include "lal_lj_tip4p_long.h"
#include <cassert>
using namespace LAMMPS_AL;
namespace LAMMPS_AL {
#define LJTIP4PLongT LJ_TIP4PLong<numtyp, acctyp>
extern Device<PRECISION,ACC_PRECISION> device;
@ -370,6 +370,5 @@ int** LJTIP4PLongT::compute(const int ago, const int inum_full,
}
template class LJ_TIP4PLong<PRECISION,ACC_PRECISION>;
}

View File

@ -13,7 +13,7 @@
// email : thevsevak@gmail.com
// ***************************************************************************
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifdef LAMMPS_SMALLBIG
@ -27,11 +27,11 @@
#define tagint int
#endif
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,13 +13,13 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#endif

View File

@ -14,7 +14,7 @@
// email : penwang@nvidia.com, brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#ifdef LAMMPS_SMALLBIG
#define tagint int
@ -27,9 +27,9 @@
#define tagint int
#endif
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
__kernel void calc_cell_id(const numtyp4 *restrict pos,

View File

@ -24,6 +24,10 @@ using namespace ucl_opencl;
#include "geryon/nvc_kernel.h"
#include "geryon/nvc_texture.h"
using namespace ucl_cudart;
#elif defined(USE_HIP)
#include "geryon/hip_kernel.h"
#include "geryon/hip_texture.h"
using namespace ucl_hip;
#else
#include "geryon/nvd_kernel.h"
#include "geryon/nvd_texture.h"

View File

@ -13,15 +13,15 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> q_tex;
_texture( pos_tex,float4);
_texture( q_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> q_tex;
_texture_2d( pos_tex,int4);
_texture( q_tex,int2);
#endif
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error

View File

@ -23,6 +23,8 @@
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

View File

@ -24,9 +24,11 @@ struct _lgpu_int2 {
int x; int y;
};
#ifndef USE_HIP
#ifndef int2
#define int2 _lgpu_int2
#endif
#endif
struct _lgpu_float2 {
float x; float y;

View File

@ -1,4 +1,4 @@
// **************************************************************************
// **************************************************************************
// preprocessor.cu
// -------------------
// W. Michael Brown (ORNL)
@ -60,6 +60,150 @@
//
//*************************************************************************/
#define _texture(name, type) texture<type> name
#define _texture_2d(name, type) texture<type,1> name
// -------------------------------------------------------------------------
// HIP DEFINITIONS
// -------------------------------------------------------------------------
#ifdef USE_HIP
#include <hip/hip_runtime.h>
#ifdef __HIP_PLATFORM_HCC__
#define mul24(x, y) __mul24(x, y)
#undef _texture
#undef _texture_2d
#define _texture(name, type) __device__ type* name
#define _texture_2d(name, type) __device__ type* name
#endif
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
#define THREAD_ID_X threadIdx.x
#define THREAD_ID_Y threadIdx.y
#define BLOCK_ID_X blockIdx.x
#define BLOCK_ID_Y blockIdx.y
#define BLOCK_SIZE_X blockDim.x
#define BLOCK_SIZE_Y blockDim.y
#define __kernel extern "C" __global__
#ifdef __local
#undef __local
#endif
#define __local __shared__
#define __global
#define restrict __restrict__
#define atom_add atomicAdd
#define ucl_inline static __inline__ __device__
#define THREADS_PER_ATOM 4
#define THREADS_PER_CHARGE 8
#define BLOCK_NBOR_BUILD 128
#define BLOCK_PAIR 256
#define BLOCK_BIO_PAIR 256
#define BLOCK_ELLIPSE 128
#define MAX_SHARED_TYPES 11
#ifdef _SINGLE_SINGLE
ucl_inline double shfl_xor(double var, int laneMask, int width) {
#ifdef __HIP_PLATFORM_HCC__
return __shfl_xor(var, laneMask, width);
#else
return __shfl_xor_sync(0xffffffff, var, laneMask, width);
#endif
}
#else
ucl_inline double shfl_xor(double var, int laneMask, int width) {
int2 tmp;
tmp.x = __double2hiint(var);
tmp.y = __double2loint(var);
#ifdef __HIP_PLATFORM_HCC__
tmp.x = __shfl_xor(tmp.x,laneMask,width);
tmp.y = __shfl_xor(tmp.y,laneMask,width);
#else
tmp.x = __shfl_xor_sync(0xffffffff, tmp.x,laneMask,width);
tmp.y = __shfl_xor_sync(0xffffffff, tmp.y,laneMask,width);
#endif
return __hiloint2double(tmp.x,tmp.y);
}
#endif
#ifdef __HIP_PLATFORM_HCC__
#define ARCH 600
#define WARP_SIZE 64
#endif
#ifdef __HIP_PLATFORM_NVCC__
#define ARCH __CUDA_ARCH__
#define WARP_SIZE 32
#endif
#define fast_mul(X,Y) (X)*(Y)
#define MEM_THREADS WARP_SIZE
#define PPPM_BLOCK_1D 64
#define BLOCK_CELL_2D 8
#define BLOCK_CELL_ID 128
#define MAX_BIO_SHARED_TYPES 128
#ifdef __HIP_PLATFORM_NVCC__
#ifdef _DOUBLE_DOUBLE
#define fetch4(ans,i,pos_tex) { \
int4 xy = tex1Dfetch(pos_tex,i*2); \
int4 zt = tex1Dfetch(pos_tex,i*2+1); \
ans.x=__hiloint2double(xy.y, xy.x); \
ans.y=__hiloint2double(xy.w, xy.z); \
ans.z=__hiloint2double(zt.y, zt.x); \
ans.w=__hiloint2double(zt.w, zt.z); \
}
#define fetch(ans,i,q_tex) { \
int2 qt = tex1Dfetch(q_tex,i); \
ans=__hiloint2double(qt.y, qt.x); \
}
#else
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#endif
#else
#ifdef _DOUBLE_DOUBLE
#define fetch4(ans,i,pos_tex) (ans=*(((double4*)pos_tex) + i))
#define fetch(ans,i,q_tex) (ans=*(((double *) q_tex) + i))
#else
#define fetch4(ans,i,pos_tex) (ans=*(((float4*)pos_tex) + i))
#define fetch(ans,i,q_tex) (ans=*(((float *) q_tex) + i))
#endif
#endif
#ifdef _DOUBLE_DOUBLE
#define ucl_exp exp
#define ucl_powr pow
#define ucl_atan atan
#define ucl_cbrt cbrt
#define ucl_ceil ceil
#define ucl_abs fabs
#define ucl_rsqrt rsqrt
#define ucl_sqrt sqrt
#define ucl_recip(x) ((numtyp)1.0/(x))
#else
#define ucl_atan atanf
#define ucl_cbrt cbrtf
#define ucl_ceil ceilf
#define ucl_abs fabsf
#define ucl_recip(x) ((numtyp)1.0/(x))
#define ucl_rsqrt rsqrtf
#define ucl_sqrt sqrtf
#ifdef NO_HARDWARE_TRANSCENDENTALS
#define ucl_exp expf
#define ucl_powr powf
#else
#define ucl_exp __expf
#define ucl_powr __powf
#endif
#endif
#endif
// -------------------------------------------------------------------------
// CUDA DEFINITIONS
// -------------------------------------------------------------------------

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"
#endif

View File

@ -13,7 +13,7 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"
#endif

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,19 +13,19 @@
// email : brownw@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> sw1_tex;
texture<float4> sw2_tex;
texture<float4> sw3_tex;
_texture( pos_tex,float4);
_texture( sw1_tex,float4);
_texture( sw2_tex,float4);
_texture( sw3_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4> sw1_tex;
texture<int4> sw2_tex;
texture<int4> sw3_tex;
_texture_2d( pos_tex,int4);
_texture( sw1_tex,int4);
_texture( sw2_tex,int4);
_texture( sw3_tex,int4);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,23 +13,23 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_extra.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> ts1_tex;
texture<float4> ts2_tex;
texture<float4> ts3_tex;
texture<float4> ts4_tex;
texture<float4> ts5_tex;
_texture( pos_tex,float4);
_texture( ts1_tex,float4);
_texture( ts2_tex,float4);
_texture( ts3_tex,float4);
_texture( ts4_tex,float4);
_texture( ts5_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4> ts1_tex;
texture<int4> ts2_tex;
texture<int4> ts3_tex;
texture<int4> ts4_tex;
texture<int4> ts5_tex;
_texture_2d( pos_tex,int4);
_texture( ts1_tex,int4);
_texture( ts2_tex,int4);
_texture( ts3_tex,int4);
_texture( ts4_tex,int4);
_texture( ts5_tex,int4);
#endif
#else

View File

@ -16,7 +16,7 @@
#ifndef LAL_TERSOFF_EXTRA_H
#define LAL_TERSOFF_EXTRA_H
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#else
#endif

View File

@ -13,23 +13,23 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_mod_extra.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> ts1_tex;
texture<float4> ts2_tex;
texture<float4> ts3_tex;
texture<float4> ts4_tex;
texture<float4> ts5_tex;
_texture( pos_tex,float4);
_texture( ts1_tex,float4);
_texture( ts2_tex,float4);
_texture( ts3_tex,float4);
_texture( ts4_tex,float4);
_texture( ts5_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4> ts1_tex;
texture<int4> ts2_tex;
texture<int4> ts3_tex;
texture<int4> ts4_tex;
texture<int4> ts5_tex;
_texture_2d( pos_tex,int4);
_texture( ts1_tex,int4);
_texture( ts2_tex,int4);
_texture( ts3_tex,int4);
_texture( ts4_tex,int4);
_texture( ts5_tex,int4);
#endif
#else

View File

@ -16,7 +16,7 @@
#ifndef LAL_TERSOFF_MOD_EXTRA_H
#define LAL_TERSOFF_MOD_EXTRA_H
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#else
#endif

View File

@ -13,25 +13,25 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_zbl_extra.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> ts1_tex;
texture<float4> ts2_tex;
texture<float4> ts3_tex;
texture<float4> ts4_tex;
texture<float4> ts5_tex;
texture<float4> ts6_tex;
_texture( pos_tex,float4);
_texture( ts1_tex,float4);
_texture( ts2_tex,float4);
_texture( ts3_tex,float4);
_texture( ts4_tex,float4);
_texture( ts5_tex,float4);
_texture( ts6_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4> ts1_tex;
texture<int4> ts2_tex;
texture<int4> ts3_tex;
texture<int4> ts4_tex;
texture<int4> ts5_tex;
texture<int4> ts6_tex;
_texture_2d( pos_tex,int4);
_texture( ts1_tex,int4);
_texture( ts2_tex,int4);
_texture( ts3_tex,int4);
_texture( ts4_tex,int4);
_texture( ts5_tex,int4);
_texture( ts6_tex,int4);
#endif
#else

View File

@ -16,7 +16,7 @@
#ifndef LAL_TERSOFF_ZBL_EXTRA_H
#define LAL_TERSOFF_ZBL_EXTRA_H
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#else
#endif

View File

@ -15,12 +15,12 @@
dekoning@ifi.unicamp.br
***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,23 +13,23 @@
// email : andershaf@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float4> param1_tex;
texture<float4> param2_tex;
texture<float4> param3_tex;
texture<float4> param4_tex;
texture<float4> param5_tex;
_texture( pos_tex,float4);
_texture( param1_tex,float4);
_texture( param2_tex,float4);
_texture( param3_tex,float4);
_texture( param4_tex,float4);
_texture( param5_tex,float4);
#else
texture<int4,1> pos_tex;
texture<int4> param1_tex;
texture<int4> param2_tex;
texture<int4> param3_tex;
texture<int4> param4_tex;
texture<int4> param5_tex;
_texture_2d( pos_tex,int4);
_texture( param1_tex,int4);
_texture( param2_tex,int4);
_texture( param3_tex,int4);
_texture( param4_tex,int4);
_texture( param5_tex,int4);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -13,15 +13,15 @@
// email : nguyentd@ornl.gov
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
texture<float> rad_tex;
_texture( pos_tex,float4);
_texture( rad_tex,float);
#else
texture<int4,1> pos_tex;
texture<int2> rad_tex;
_texture_2d( pos_tex,int4);
_texture( rad_tex,int2);
#endif
#else

View File

@ -13,12 +13,12 @@
// email : ndactrung@gmail.com
// ***************************************************************************/
#ifdef NV_KERNEL
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
#ifndef _DOUBLE_DOUBLE
texture<float4> pos_tex;
_texture( pos_tex,float4);
#else
texture<int4,1> pos_tex;
_texture_2d( pos_tex,int4);
#endif
#else
#define pos_tex x_

View File

@ -0,0 +1,120 @@
# hip = MPI with HIP(clang)
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpicxx
CCFLAGS = -g -O3
SHFLAGS = -fPIC
DEPFLAGS = -M
HIP_PATH ?= $(wildcard /opt/rocm/hip)
LINK = $(HIP_PATH)/bin/hipcc
LINKFLAGS = -g -O3 $(shell mpicxx --showme:link)
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manual
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
FFT_PATH =
FFT_LIB =
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB =
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
ifeq (nvcc,${HIP_PLATFORM})
# fix nvcc can't handle -pthread flag
LINKFLAGS := $(subst -pthread,-Xcompiler -pthread,$(LINKFLAGS))
endif
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend