Merge pull request #3365 from pvelesko/chip_spv

Add CMake and GNU make support for HIP via CHIP-SPV
2022-07-29 06:33:55 -04:00 · 2022-07-29 06:33:55 -04:00 · 2ce33c14b7
parent 8076624477 5fee276348
commit 2ce33c14b7
4 changed files with 87 additions and 23 deletions
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -233,7 +233,8 @@ elseif(GPU_API STREQUAL "OPENCL")
 elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
-          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
+          message(FATAL_ERROR "GPU_API=HIP requires HIP_PATH to be defined.\n"
+          "Either pass the HIP_PATH as a CMake option via -DHIP_PATH=... or set the HIP_PATH environment variable.")
      else()
          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
      endif()
@ -261,6 +262,8 @@ elseif(GPU_API STREQUAL "HIP")

  if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "amd")
    set(HIP_ARCH "gfx906" CACHE STRING "HIP target architecture")
+  elseif(HIP_PLATFORM STREQUAL "spirv")
+    set(HIP_ARCH "spirv" CACHE STRING "HIP target architecture")
  elseif(HIP_PLATFORM STREQUAL "nvcc")
    find_package(CUDA REQUIRED)
    set(HIP_ARCH "sm_50" CACHE STRING "HIP primary CUDA architecture (e.g. sm_60)")
@ -340,7 +343,14 @@ elseif(GPU_API STREQUAL "HIP")
          VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} --fatbin --use_fast_math -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} ${HIP_CUDA_GENCODE} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_FILE}
          DEPENDS ${CU_FILE}
          COMMENT "Generating ${CU_NAME}.cubin")
-    endif()
+    elseif(HIP_PLATFORM STREQUAL "spirv")
+      configure_file(${CU_FILE} ${CU_CPP_FILE} COPYONLY)
+
+      add_custom_command(OUTPUT ${CUBIN_FILE}
+        VERBATIM COMMAND ${HIP_HIPCC_EXECUTABLE} -c -O3 -DUSE_HIP -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES} -I${LAMMPS_LIB_SOURCE_DIR}/gpu -o ${CUBIN_FILE} ${CU_CPP_FILE}
+        DEPENDS ${CU_CPP_FILE}
+        COMMENT "Gerating ${CU_NAME}.cubin")
+      endif()

    add_custom_command(OUTPUT ${CUBIN_H_FILE}
      COMMAND ${CMAKE_COMMAND} -D SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -D VARNAME=${CU_NAME} -D HEADER_FILE=${CUBIN_H_FILE} -D SOURCE_FILE=${CUBIN_FILE} -P ${CMAKE_CURRENT_SOURCE_DIR}/Modules/GenerateBinaryHeader.cmake
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -123,6 +123,7 @@ CMake build
   -D GPU_API=value             # value = opencl (default) or cuda or hip
   -D GPU_PREC=value            # precision setting
                                # value = double or mixed (default) or single
+   -D HIP_PATH                  # path to HIP installation. Must be set if GPU_API=HIP
   -D GPU_ARCH=value            # primary GPU hardware choice for GPU_API=cuda
                                # value = sm_XX, see below
                                # default is sm_50
@ -179,10 +180,17 @@ set appropriate environment variables. Some variables such as
 :code:`HCC_AMDGPU_TARGET` (for ROCm <= 4.0) or :code:`CUDA_PATH` are necessary for :code:`hipcc`
 and the linker to work correctly.

+Using CHIP-SPV implementation of HIP is now supported. It allows one to run HIP
+code on Intel GPUs via the OpenCL or Level Zero backends. To use CHIP-SPV, you must 
+set :code:`-DHIP_USE_DEVICE_SORT=OFF` in your CMake command line as CHIP-SPV does not 
+yet support hipCUB. The use of HIP for Intel GPUs is still experimental so you 
+should only use this option in preparations to run on Aurora system at ANL. 
+
 .. code:: bash

   # AMDGPU target (ROCm <= 4.0)
   export HIP_PLATFORM=hcc
+   export HIP_PATH=/path/to/HIP/install
   export HCC_AMDGPU_TARGET=gfx906
   cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
   make -j 4
@ -191,6 +199,7 @@ and the linker to work correctly.

   # AMDGPU target (ROCm >= 4.1)
   export HIP_PLATFORM=amd
+   export HIP_PATH=/path/to/HIP/install
   cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=gfx906 -D CMAKE_CXX_COMPILER=hipcc ..
   make -j 4

@ -199,10 +208,20 @@ and the linker to work correctly.
   # CUDA target (not recommended, use GPU_ARCH=cuda)
   # !!! DO NOT set CMAKE_CXX_COMPILER !!!
   export HIP_PLATFORM=nvcc
+   export HIP_PATH=/path/to/HIP/install
   export CUDA_PATH=/usr/local/cuda
   cmake -D PKG_GPU=on -D GPU_API=HIP -D HIP_ARCH=sm_70 ..
   make -j 4

+.. code:: bash
+
+   # SPIR-V target (Intel GPUs)
+   export HIP_PLATFORM=spirv
+   export HIP_PATH=/path/to/HIP/install
+   export CMAKE_CXX_COMPILER=<hipcc/clang++>
+   cmake -D PKG_GPU=on -D GPU_API=HIP ..
+   make -j 4
+
 Traditional make
 ^^^^^^^^^^^^^^^^

--- a/lib/gpu/Makefile.hip
+++ b/lib/gpu/Makefile.hip
@ -1,6 +1,9 @@
 # /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for HIP
-#     - export HIP_PLATFORM=amd (or nvcc) before execution
+#     - export HIP_PATH=/path/to/HIP/install path to the HIP implementation
+#       such as hipamd or CHIP-SPV.
+#     - export HIP_PLATFORM=<amd/nvcc/spirv> specify the HIP platform to use.
+#       Optional. If not set, will be determined by ${HIP_PATH}/bin/hipconfig.
 #     - change HIP_ARCH for your GPU
 # ------------------------------------------------------------------------- */

@ -20,41 +23,62 @@ HIP_OPTS = -O3
 HIP_HOST_OPTS = -Wno-deprecated-declarations -fopenmp
 HIP_HOST_INCLUDE =

+ifndef HIP_PATH
+$(error HIP_PATH is not set)
+endif
+
+ifndef HIP_PLATFORM
+	HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
+endif
+HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
+
 # use device sort
 # requires linking with hipcc and hipCUB + (rocPRIM or CUB for AMD or Nvidia respectively)
+ifneq (spirv,$(HIP_PLATFORM))
+# hipCUB not aviable for CHIP-SPV
 HIP_HOST_OPTS += -DUSE_HIP_DEVICE_SORT
+endif
 # path to cub
 HIP_HOST_INCLUDE += -I./
 # path to hipcub
 HIP_HOST_INCLUDE += -I$(HIP_PATH)/../include

 ifeq (amd,$(HIP_PLATFORM))
-	# newer version of ROCm (5.1+) require c++14 for rocprim
-	HIP_OPTS += -std=c++14
+# newer version of ROCm (5.1+) require c++14 for rocprim
+HIP_OPTS += -std=c++14
 endif

 # use mpi
 HIP_HOST_OPTS += -DMPI_GERYON -DUCL_NO_EXIT
 # this settings should match LAMMPS Makefile
-MPI_COMP_OPTS = $(shell mpicxx --showme:compile)
+# automatic flag detection for OpenMPI
+ifeq ($(shell mpicxx --showme:compile >/dev/null 2>&1; echo $$?), 0)
+MPI_COMP_OPTS = $(shell mpicxx --showme:compile) -DOMPI_SKIP_MPICXX=1
 MPI_LINK_OPTS = $(shell mpicxx --showme:link)
-
-HIP_PATH ?= $(wildcard /opt/rocm/hip)
-HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
-HIP_COMPILER=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
+# automatic flag detection for MPICH
+else ifeq ($(shell mpicxx -compile_info >/dev/null 2>&1; echo $$?),0)
+MPI_COMP_OPTS = $(filter -I%,$(shell mpicxx -compile_info)) -DMPICH_IGNORE_CXX_SEEK
+MPI_LINK_OPTS = $(filter -Wl%,$(shell mpicxx -link_info)) $(filter -L%,$(shell mpicxx -link_info)) $(filter -l%,$(shell mpicxx -link_info))
+# for other MPI libs: must set flags manually, if needed
+else
+MPI_COMP_OPTS =
+MPI_LINK_OPTS =
+endif

 ifeq (hcc,$(HIP_PLATFORM))
-	# possible values: gfx803,gfx900,gfx906
-	HIP_ARCH = gfx906
+# possible values: gfx803,gfx900,gfx906
+HIP_ARCH = gfx906
 else ifeq (amd,$(HIP_PLATFORM))
-	# possible values: gfx803,gfx900,gfx906
-	HIP_ARCH = gfx906
+# possible values: gfx803,gfx900,gfx906
+HIP_ARCH = gfx906
 else ifeq (nvcc,$(HIP_PLATFORM))
-	HIP_OPTS  += --use_fast_math
-	HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
+HIP_OPTS  += --use_fast_math
+HIP_ARCH = -gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_32,code=[sm_32,compute_32] -gencode arch=compute_35,code=[sm_35,compute_35] \
 		    -gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] -gencode arch=compute_53,code=[sm_53,compute_53]\
 			-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] -gencode arch=compute_62,code=[sm_62,compute_62]\
 			-gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_72,code=[sm_72,compute_72] -gencode arch=compute_75,code=[sm_75,compute_75]
+else ifeq (spirv,$(HIP_PLATFORM))
+HIP_ARCH = spirv
 endif

 BIN_DIR = .
@ -71,7 +95,15 @@ BSH = /bin/sh
 HIP_OPTS += -DUSE_HIP $(HIP_PRECISION)
 HIP_GPU_OPTS += $(HIP_OPTS) -I./

-ifeq (clang,$(HIP_COMPILER))
+ifeq (spirv,$(HIP_PLATFORM))
+	HIP_HOST_OPTS += -fPIC
+	HIP_GPU_CC  = $(HIP_PATH)/bin/hipcc -c
+	HIP_GPU_OPTS_S = 
+	HIP_GPU_OPTS_E =
+	HIP_KERNEL_SUFFIX = .cpp
+	HIP_LIBS_TARGET = 
+	export HCC_AMDGPU_TARGET := $(HIP_ARCH)
+else ifeq (clang,$(HIP_COMPILER))
 	HIP_HOST_OPTS += -fPIC
 	HIP_GPU_CC  = $(HIP_PATH)/bin/hipcc --genco
 	HIP_GPU_OPTS_S = --offload-arch=$(HIP_ARCH)
--- a/lib/gpu/lal_pre_cuda_hip.h
+++ b/lib/gpu/lal_pre_cuda_hip.h
@ -30,7 +30,7 @@
 // -------------------------------------------------------------------------


-#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
+#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
 #define CONFIG_ID 303
 #define SIMD_SIZE 64
 #else
@ -112,7 +112,7 @@
 //                         KERNEL MACROS - TEXTURES
 // -------------------------------------------------------------------------

-#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
+#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)
 #define _texture(name, type)  __device__ type* name
 #define _texture_2d(name, type)  __device__ type* name
 #else
@ -134,9 +134,12 @@
    int2 qt = tex1Dfetch(q_tex,i);                       \
    ans=__hiloint2double(qt.y, qt.x);                    \
  }
+  #elseif  defined(__HIP_PLATFORM_SPIRV__)
+      #define fetch4(ans,i,pos_tex) tex1Dfetch(&ans, pos_tex, i);
+      #define fetch(ans,i,q_tex) tex1Dfetch(&ans, q_tex,i);
  #else
-  #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
-  #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
+    #define fetch4(ans,i,pos_tex) ans=tex1Dfetch(pos_tex, i);
+    #define fetch(ans,i,q_tex) ans=tex1Dfetch(q_tex,i);
  #endif
 #else
  #define fetch4(ans,i,x) ans=x[i]
@ -152,7 +155,7 @@
  #define mu_tex mu_
 #endif

-#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
+#if defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)

 #undef fetch4
 #undef fetch
@ -209,7 +212,7 @@
 #endif
 #endif

-#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)
+#if defined(CUDA_PRE_NINE) || defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_SPIRV__)

  #ifdef _SINGLE_SINGLE
    #define shfl_down __shfl_down