Merge branch 'kokkos-amd-gfx1103-support' into collected-small-fixes

This commit is contained in:
Axel Kohlmeyer 2024-07-30 08:46:22 -04:00
commit 208216ba3d
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
8 changed files with 21 additions and 6 deletions

View File

@ -639,6 +639,9 @@ They must be specified in uppercase.
* - AMD_GFX1100
- GPU
- AMD GPU RX7900XTX
* - AMD_GFX1103
- GPU
- AMD Phoenix APU with Radeon 740M/760M/780M/880M/890M
* - INTEL_GEN
- GPU
- SPIR64-based devices, e.g. Intel GPUs, using JIT

View File

@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
@ -433,6 +433,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
@ -1118,6 +1119,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)

View File

@ -116,6 +116,7 @@
#cmakedefine KOKKOS_ARCH_AMD_GFX942
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
#cmakedefine KOKKOS_ARCH_AMD_GFX1103
#cmakedefine KOKKOS_ARCH_AMD_GPU
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated

View File

@ -101,9 +101,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)
LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)
#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)

View File

@ -35,7 +35,8 @@ struct HIPTraits {
static constexpr int WarpSize = 64;
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
defined(KOKKOS_ARCH_AMD_GFX1103)
static constexpr int WarpSize = 32;
static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */
static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/

View File

@ -143,7 +143,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
local_offset_value = element_values(team_id, i - 1);
// FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
!defined(KOKKOS_ARCH_AMD_GFX1100)
!defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103)
if constexpr (Analysis::Reducer::has_join_member_function()) {
if constexpr (std::is_void_v<WorkTag>)
a_functor_reducer.get_functor().join(local_offset_value,

View File

@ -750,6 +750,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
#elif defined(KOKKOS_ARCH_AMD_GFX1100)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1100");
#elif defined(KOKKOS_ARCH_AMD_GFX1103)
declare_configuration_metadata("architecture", "GPU architecture",
"AMD_GFX1103");
#else
declare_configuration_metadata("architecture", "GPU architecture", "none");

View File

@ -164,6 +164,7 @@ display_help_text() {
echo " AMD_GFX942 = AMD GPU MI300 GFX942"
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103"
echo " [ARM]"
echo " ARMV80 = ARMv8.0 Compatible CPU"
echo " ARMV81 = ARMv8.1 Compatible CPU"