Merge pull request #3365 from pvelesko/chip_spv

Add CMake and GNU make support for HIP via CHIP-SPV
This commit is contained in:
Axel Kohlmeyer 2022-07-29 06:33:55 -04:00 committed by GitHub
commit 2ce33c14b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 23 deletions

View File

@ -233,7 +233,8 @@ elseif(GPU_API STREQUAL "OPENCL")
elseif(GPU_API STREQUAL "HIP")
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
message(FATAL_ERROR "GPU_API=HIP requires HIP_PATH to be defined.\n"
"Either pass the HIP_PATH as a CMake option via -DHIP_PATH=... or set the HIP_PATH environment variable.")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
endif()
@ -261,6 +262,8 @@ elseif(GPU_API STREQUAL "HIP")
if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "amd")
set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "spirv")
set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture")
elseif(HIP_PLATFORM STREQUAL "nvcc")
find_package(CUDA REQUIRED)
set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)")
@ -340,7 +343,14 @@ elseif(GPU_API STREQUAL "HIP")
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE}
DEPENDS ${CU_FILE}
COMMENT "Generating ${CU_NAME}.cubin")
endif()
elseif(HIP_PLATFORM STREQUAL "spirv")
configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)
add_custom_command(OUTPUT ${CUBIN_FILE}
VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE}
DEPENDS ${CU_CPP_FILE}
COMMENT "Gerating ${CU_NAME}.cubin")
endif()
add_custom_command(OUTPUT ${CUBIN_H_FILE}
COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake

View File

@ -123,6 +123,7 @@ CMake build
-D GPU_API=value # value = opencl (default) or cuda or hip
-D GPU_PREC=value # precision setting
# value = double or mixed (default) or single
-D HIP_PATH # path to HIP installation. Must be set if GPU_API=HIP
-D GPU_ARCH=value # primary GPU hardware choice for GPU_API=cuda
# value = sm_XX, see below
# default is sm_50
@ -179,10 +180,17 @@ set appropriate environment variables. Some variables such as
:code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc`
and the linker to work correctly.
Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP
code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must
set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not
yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you
should only use this option in preparations to run on Aurora system at ANL.
.. code:: bash
# AMDGPU target (ROCm <= 4.0)
export HIP_PLATFORM=hcc
export HIP_PATH=/path/to/HIP/install
export HCC_AMDGPU_TARGET=gfx906
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
make -j 4
@ -191,6 +199,7 @@ and the linker to work correctly.
# AMDGPU target (ROCm >= 4.1)
export HIP_PLATFORM=amd
export HIP_PATH=/path/to/HIP/install
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
make -j 4
@ -199,10 +208,20 @@ and the linker to work correctly.
# CUDA target (not recommended, use GPU_ARCH=cuda)
# !!! DO NOT set CMAKE_CXX_COMPILER !!!
export HIP_PLATFORM=nvcc
export HIP_PATH=/path/to/HIP/install
export CUDA_PATH=/usr/local/cuda
cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=sm_70 ..
make -j 4
.. code:: bash
# SPIR-V target (Intel GPUs)
export HIP_PLATFORM=spirv
export HIP_PATH=/path/to/HIP/install
export CMAKE_CXX_COMPILER=<hipcc/clang++>
cmake -D PKG_GPU=on -D GPU_API=HIP ..
make -j 4
Traditional make
^^^^^^^^^^^^^^^^

View File

@ -1,6 +1,9 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for HIP
# - export HIP_PLATFORM=amd (or nvcc) before execution
# - export HIP_PATH=/path/to/HIP/install path to the HIP implementation
# such as hipamd or CHIP-SPV.
# - export HIP_PLATFORM=<amd/nvcc/spirv> specify the HIP platform to use.
# Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig.
# - change HIP_ARCH for your GPU
# ------------------------------------------------------------------------- */
@ -20,41 +23,62 @@ HIP_OPTS = -O3
HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
HIP_HOST_INCLUDE =
ifndef HIP_PATH
$(error HIP_PATH is not set)
endif
ifndef HIP_PLATFORM
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
endif
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# use device sort
# requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
ifneq (spirv,$(HIP_PLATFORM))
# hipCUB not aviable for CHIP-SPV
HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
endif
# path to cub
HIP_HOST_INCLUDE += -I./
# path to hipcub
HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include
ifeq (amd,$(HIP_PLATFORM))
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
# newer version of ROCm (5.1+) require c++14 for rocprim
HIP_OPTS += -std=c++14
endif
# use mpi
HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
# this settings should match LAMMPS Makefile
MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
# automatic flag detection for OpenMPI
ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0)
MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1
MPI_LINK_OPTS = $(shell mpicxx --showme:link)
HIP_PATH ?= $(wildcard /opt/rocm/hip)
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
# automatic flag detection for MPICH
else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0)
MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK
MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info))
# for other MPI libs: must set flags manually, if needed
else
MPI_COMP_OPTS =
MPI_LINK_OPTS =
endif
ifeq (hcc,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (amd,$(HIP_PLATFORM))
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
# possible values: gfx803,gfx900,gfx906
HIP_ARCH = gfx906
else ifeq (nvcc,$(HIP_PLATFORM))
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
HIP_OPTS += --use_fast_math
HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
else ifeq (spirv,$(HIP_PLATFORM))
HIP_ARCH = spirv
endif
BIN_DIR = .
@ -71,7 +95,15 @@ BSH = /bin/sh
HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
HIP_GPU_OPTS += $(HIP_OPTS) -I./
ifeq (clang,$(HIP_COMPILER))
ifeq (spirv,$(HIP_PLATFORM))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
HIP_GPU_OPTS_S =
HIP_GPU_OPTS_E =
HIP_KERNEL_SUFFIX = .cpp
HIP_LIBS_TARGET =
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
else ifeq (clang,$(HIP_COMPILER))
HIP_HOST_OPTS += -fPIC
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc --genco
HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH)

View File

@ -30,7 +30,7 @@
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define CONFIG_ID 303
#define SIMD_SIZE 64
#else
@ -112,7 +112,7 @@
// KERNEL MACROS - TEXTURES
// -------------------------------------------------------------------------
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#define _texture(name, type) __device__ type* name
#define _texture_2d(name, type) __device__ type* name
#else
@ -134,9 +134,12 @@
int2 qt = tex1Dfetch(q_tex,i); \
ans=__hiloint2double(qt.y, qt.x); \
}
#elseif defined(__HIP_PLATFORM_SPIRV__)
#define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i);
#define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i);
#else
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
#define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
#endif
#else
#define fetch4(ans,i,x) ans=x[i]
@ -152,7 +155,7 @@
#define mu_tex mu_
#endif
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#undef fetch4
#undef fetch
@ -209,7 +212,7 @@
#endif
#endif
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
#ifdef _SINGLE_SINGLE
#define shfl_down __shfl_down