diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 75569aa55d..243b5111de 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -233,7 +233,8 @@ elseif(GPU_API STREQUAL "OPENCL") elseif(GPU_API STREQUAL "HIP") if(NOT DEFINED HIP_PATH) if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation") + message(FATAL_ERROR "GPU_API=HIP requires HIP_PATH to be defined.\n" + "Either pass the HIP_PATH as a CMake option via -DHIP_PATH=... or set the HIP_PATH environment variable.") else() set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation") endif() @@ -261,6 +262,8 @@ elseif(GPU_API STREQUAL "HIP") if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "amd") set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture") + elseif(HIP_PLATFORM STREQUAL "spirv") + set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture") elseif(HIP_PLATFORM STREQUAL "nvcc") find_package(CUDA REQUIRED) set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)") @@ -340,7 +343,14 @@ elseif(GPU_API STREQUAL "HIP") VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE} DEPENDS ${CU_FILE} COMMENT "Generating ${CU_NAME}.cubin") - endif() + elseif(HIP_PLATFORM STREQUAL "spirv") + configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY) + + add_custom_command(OUTPUT ${CUBIN_FILE} + VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE} + DEPENDS ${CU_CPP_FILE} + COMMENT "Gerating ${CU_NAME}.cubin") + endif() add_custom_command(OUTPUT ${CUBIN_H_FILE} COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 3dad393a52..24704d8672 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -123,6 +123,7 @@ CMake build -D GPU_API=value # value = opencl (default) or cuda or hip -D GPU_PREC=value # precision setting # value = double or mixed (default) or single + -D HIP_PATH # path to HIP installation. Must be set if GPU_API=HIP -D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda # value = sm_XX, see below # default is sm_50 @@ -179,10 +180,17 @@ set appropriate environment variables. Some variables such as :code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc` and the linker to work correctly. +Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP +code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must +set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not +yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you +should only use this option in preparations to run on Aurora system at ANL. + .. code:: bash # AMDGPU target (ROCm <= 4.0) export HIP_PLATFORM=hcc + export HIP_PATH=/path/to/HIP/install export HCC_AMDGPU_TARGET=gfx906 cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc .. make -j 4 @@ -191,6 +199,7 @@ and the linker to work correctly. # AMDGPU target (ROCm >= 4.1) export HIP_PLATFORM=amd + export HIP_PATH=/path/to/HIP/install cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc .. make -j 4 @@ -199,10 +208,20 @@ and the linker to work correctly. # CUDA target (not recommended, use GPU_ARCH=cuda) # !!! DO NOT set CMAKE_CXX_COMPILER !!! export HIP_PLATFORM=nvcc + export HIP_PATH=/path/to/HIP/install export CUDA_PATH=/usr/local/cuda cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=sm_70 .. make -j 4 +.. code:: bash + + # SPIR-V target (Intel GPUs) + export HIP_PLATFORM=spirv + export HIP_PATH=/path/to/HIP/install + export CMAKE_CXX_COMPILER= + cmake -D PKG_GPU=on -D GPU_API=HIP .. + make -j 4 + Traditional make ^^^^^^^^^^^^^^^^ diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip index 9b6087bcc3..f5a0d03608 100644 --- a/lib/gpu/Makefile.hip +++ b/lib/gpu/Makefile.hip @@ -1,6 +1,9 @@ # /* ---------------------------------------------------------------------- # Generic Linux Makefile for HIP -# - export HIP_PLATFORM=amd (or nvcc) before execution +# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation +# such as hipamd or CHIP-SPV. +# - export HIP_PLATFORM= specify the HIP platform to use. +# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig. # - change HIP_ARCH for your GPU # ------------------------------------------------------------------------- */ @@ -20,41 +23,62 @@ HIP_OPTS = -O3 HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp HIP_HOST_INCLUDE = +ifndef HIP_PATH +$(error HIP_PATH is not set) +endif + +ifndef HIP_PLATFORM + HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) +endif +HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler) + # use device sort # requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively) +ifneq (spirv,$(HIP_PLATFORM)) +# hipCUB not aviable for CHIP-SPV HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT +endif # path to cub HIP_HOST_INCLUDE += -I./ # path to hipcub HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include ifeq (amd,$(HIP_PLATFORM)) - # newer version of ROCm (5.1+) require c++14 for rocprim - HIP_OPTS += -std=c++14 +# newer version of ROCm (5.1+) require c++14 for rocprim +HIP_OPTS += -std=c++14 endif # use mpi HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT # this settings should match LAMMPS Makefile -MPI_COMP_OPTS = $(shell mpicxx --showme:compile) +# automatic flag detection for OpenMPI +ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0) +MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1 MPI_LINK_OPTS = $(shell mpicxx --showme:link) - -HIP_PATH ?= $(wildcard /opt/rocm/hip) -HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) -HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler) +# automatic flag detection for MPICH +else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0) +MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK +MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info)) +# for other MPI libs: must set flags manually, if needed +else +MPI_COMP_OPTS = +MPI_LINK_OPTS = +endif ifeq (hcc,$(HIP_PLATFORM)) - # possible values: gfx803,gfx900,gfx906 - HIP_ARCH = gfx906 +# possible values: gfx803,gfx900,gfx906 +HIP_ARCH = gfx906 else ifeq (amd,$(HIP_PLATFORM)) - # possible values: gfx803,gfx900,gfx906 - HIP_ARCH = gfx906 +# possible values: gfx803,gfx900,gfx906 +HIP_ARCH = gfx906 else ifeq (nvcc,$(HIP_PLATFORM)) - HIP_OPTS += --use_fast_math - HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ +HIP_OPTS += --use_fast_math +HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \ -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\ -gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\ -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75] +else ifeq (spirv,$(HIP_PLATFORM)) +HIP_ARCH = spirv endif BIN_DIR = . @@ -71,7 +95,15 @@ BSH = /bin/sh HIP_OPTS += -DUSE_HIP $(HIP_PRECISION) HIP_GPU_OPTS += $(HIP_OPTS) -I./ -ifeq (clang,$(HIP_COMPILER)) +ifeq (spirv,$(HIP_PLATFORM)) + HIP_HOST_OPTS += -fPIC + HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c + HIP_GPU_OPTS_S = + HIP_GPU_OPTS_E = + HIP_KERNEL_SUFFIX = .cpp + HIP_LIBS_TARGET = + export HCC_AMDGPU_TARGET := $(HIP_ARCH) +else ifeq (clang,$(HIP_COMPILER)) HIP_HOST_OPTS += -fPIC HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH) diff --git a/lib/gpu/lal_pre_cuda_hip.h b/lib/gpu/lal_pre_cuda_hip.h index 47a005b998..ec666a2863 100644 --- a/lib/gpu/lal_pre_cuda_hip.h +++ b/lib/gpu/lal_pre_cuda_hip.h @@ -30,7 +30,7 @@ // ------------------------------------------------------------------------- -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #define CONFIG_ID 303 #define SIMD_SIZE 64 #else @@ -112,7 +112,7 @@ // KERNEL MACROS - TEXTURES // ------------------------------------------------------------------------- -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #define _texture(name, type) __device__ type* name #define _texture_2d(name, type) __device__ type* name #else @@ -134,9 +134,12 @@ int2 qt = tex1Dfetch(q_tex,i); \ ans=__hiloint2double(qt.y, qt.x); \ } + #elseif defined(__HIP_PLATFORM_SPIRV__) + #define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i); + #define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i); #else - #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i); - #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i); + #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i); + #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i); #endif #else #define fetch4(ans,i,x) ans=x[i] @@ -152,7 +155,7 @@ #define mu_tex mu_ #endif -#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #undef fetch4 #undef fetch @@ -209,7 +212,7 @@ #endif #endif -#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__) #ifdef _SINGLE_SINGLE #define shfl_down __shfl_down