From d2c4ade697a39e64419382ed0bf1ac3c17c993a0 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Fri, 10 Apr 2020 17:48:04 -0400
Subject: [PATCH] Add HIP_USE_DEVICE_SORT CMake option

---
 cmake/CMakeLists.txt             |  5 ++++
 cmake/Modules/FindCUB.cmake      | 16 ++++++++++++
 cmake/Modules/Packages/GPU.cmake | 43 ++++++++++++++++++++++++++++++++
 doc/src/Build_extras.rst         | 34 +++++++++++++------------
 4 files changed, 82 insertions(+), 16 deletions(-)
 create mode 100644 cmake/Modules/FindCUB.cmake

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index ae61a7e5f3..2a48f60f2a 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -736,6 +736,11 @@ if(PKG_GPU)
   elseif(GPU_API STREQUAL "HIP")
     message(STATUS "HIP platform:     ${HIP_PLATFORM}")
     message(STATUS "HIP architecture: ${HIP_ARCH}")
+    if(HIP_USE_DEVICE_SORT)
+      message(STATUS "HIP GPU sorting: on")
+    else()
+      message(STATUS "HIP GPU sorting: off")
+    endif()
   endif()
   message(STATUS "GPU precision:    ${GPU_PREC}")
 endif()
diff --git a/cmake/Modules/FindCUB.cmake b/cmake/Modules/FindCUB.cmake
new file mode 100644
index 0000000000..848e68e815
--- /dev/null
+++ b/cmake/Modules/FindCUB.cmake
@@ -0,0 +1,16 @@
+# - Find CUB
+# Find the CUB header library
+#
+#  CUB_INCLUDE_DIRS - where to find cub/cub.cuh
+#  CUB_FOUND        - True if CUB found.
+#
+
+find_path(CUB_INCLUDE_DIR cub.cuh PATH_SUFFIXES cub)
+
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set CUB_FOUND to TRUE
+# if all listed variables are TRUE
+
+find_package_handle_standard_args(CUB DEFAULT_MSG CUB_INCLUDE_DIR)
+
+mark_as_advanced(CUB_INCLUDE_DIR)
diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake
index 317a698e37..dcfe06b911 100644
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@@ -197,6 +197,7 @@ elseif(GPU_API STREQUAL "HIP")
   endif()
   set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
   find_package(HIP REQUIRED)
+  option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
 
   if(NOT DEFINED HIP_PLATFORM)
       if(NOT DEFINED ENV{HIP_PLATFORM})
@@ -284,6 +285,48 @@ elseif(GPU_API STREQUAL "HIP")
   target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
   target_compile_definitions(gpu PRIVATE -DUSE_HIP)
 
+  if(HIP_USE_DEVICE_SORT)
+    # add hipCUB
+    target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/../include)
+    target_compile_definitions(gpu PRIVATE -DUSE_HIP_DEVICE_SORT)
+
+    if(HIP_PLATFORM STREQUAL "nvcc")
+      find_package(CUB)
+
+      if(CUB_FOUND)
+        set(DOWNLOAD_CUB_DEFAULT OFF)
+      else()
+        set(DOWNLOAD_CUB_DEFAULT ON)
+      endif()
+
+      option(DOWNLOAD_CUB "Download and compile the CUB library instead of using an already installed one" ${DOWNLOAD_CUB_DEFAULT})
+
+      if(DOWNLOAD_CUB)
+        message(STATUS "CUB download requested")
+        include(ExternalProject)
+
+        ExternalProject_Add(CUB
+          GIT_REPOSITORY https://github.com/NVlabs/cub
+          TIMEOUT 5
+          PREFIX "${CMAKE_CURRENT_BINARY_DIR}"
+          CONFIGURE_COMMAND ""
+          BUILD_COMMAND ""
+          INSTALL_COMMAND ""
+          UPDATE_COMMAND ""
+        )
+        ExternalProject_get_property(CUB SOURCE_DIR)
+        set(CUB_INCLUDE_DIR ${SOURCE_DIR})
+      else()
+        find_package(CUB)
+        if(NOT CUB_FOUND)
+          message(FATAL_ERROR "CUB library not found. Help CMake to find it by setting CUB_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it")
+        endif()
+      endif()
+
+      target_include_directories(gpu PRIVATE ${CUB_INCLUDE_DIR})
+    endif()
+  endif()
+
   hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
   target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
 
diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 7f920d1cd8..5ba3aa85f9 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -94,22 +94,24 @@ three different types of backends: OpenCL, CUDA and HIP.
 
 .. code-block:: bash
 
-   -D GPU_API=value          # value = opencl (default) or cuda or hip
-   -D GPU_PREC=value         # precision setting
-                             # value = double or mixed (default) or single
-   -D OCL_TUNE=value         # hardware choice for GPU_API=opencl
-                             # generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
-   -D GPU_ARCH=value         # primary GPU hardware choice for GPU_API=cuda
-                             # value = sm_XX, see below
-                             # default is sm_30
-   -D HIP_ARCH=value         # primary GPU hardware choice for GPU_API=hip
-                             # value depends on selected HIP_PLATFORM
-                             # default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_30' for HIP_PLATFORM=nvcc
-   -D CUDPP_OPT=value        # optimization setting for GPU_API=cuda
-                             # enables CUDA Performance Primitives Optimizations
-                             # value = yes (default) or no
-   -D CUDA_MPS_SUPPORT=value # enables some tweaks required to run with active nvidia-cuda-mps daemon
-                             # value = yes or no (default)
+   -D GPU_API=value             # value = opencl (default) or cuda or hip
+   -D GPU_PREC=value            # precision setting
+                                # value = double or mixed (default) or single
+   -D OCL_TUNE=value            # hardware choice for GPU_API=opencl
+                                # generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
+   -D GPU_ARCH=value            # primary GPU hardware choice for GPU_API=cuda
+                                # value = sm_XX, see below
+                                # default is sm_30
+   -D HIP_ARCH=value            # primary GPU hardware choice for GPU_API=hip
+                                # value depends on selected HIP_PLATFORM
+                                # default is 'gfx906' for HIP_PLATFORM=hcc and 'sm_30' for HIP_PLATFORM=nvcc
+   -D HIP_USE_DEVICE_SORT=value # enables GPU sorting
+                                # value = yes (default) or no
+   -D CUDPP_OPT=value           # optimization setting for GPU_API=cuda
+                                # enables CUDA Performance Primitives Optimizations
+                                # value = yes (default) or no
+   -D CUDA_MPS_SUPPORT=value    # enables some tweaks required to run with active nvidia-cuda-mps daemon
+                                # value = yes or no (default)
 
 :code:`GPU_ARCH` settings for different GPU hardware is as follows: