From e6a708d253a53507786025e0f4ab35db7e51bf46 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 28 Jul 2024 20:49:17 -0400 Subject: [PATCH 1/4] add support for AMD Phoenix APUs with HIP --- lib/kokkos/Makefile.kokkos | 8 +++++++- lib/kokkos/cmake/KokkosCore_config.h.in | 1 + lib/kokkos/cmake/kokkos_arch.cmake | 12 +++++++++--- lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp | 3 ++- .../Kokkos_OpenMPTarget_ParallelScan_Range.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Core.cpp | 3 +++ lib/kokkos/generate_makefile.bash | 1 + 7 files changed, 24 insertions(+), 6 deletions(-) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 14a417b532..2b5507a421 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" @@ -433,6 +433,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH), ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) endif +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) @@ -1118,6 +1119,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103 +endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in index 3ab39cd6ab..94f8fc4214 100644 --- a/lib/kokkos/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -116,6 +116,7 @@ #cmakedefine KOKKOS_ARCH_AMD_GFX942 #cmakedefine KOKKOS_ARCH_AMD_GFX1030 #cmakedefine KOKKOS_ARCH_AMD_GFX1100 +#cmakedefine KOKKOS_ARCH_AMD_GFX1103 #cmakedefine KOKKOS_ARCH_AMD_GPU #cmakedefine KOKKOS_ARCH_VEGA // deprecated #cmakedefine KOKKOS_ARCH_VEGA906 // deprecated diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake index 34e9f05986..816a532950 100644 --- a/lib/kokkos/cmake/kokkos_arch.cmake +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -101,9 +101,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908) LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60) LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906) LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906) -LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800) -LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030) +LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800) +LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030) #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) @@ -1045,6 +1045,12 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) SET(KOKKOS_ARCH_NAVI1100 ON) BREAK() ENDIF() + STRING(REGEX MATCH "1103" IS_1103 ${ARCH}) + IF(IS_1103) + SET(KOKKOS_ARCH_AMD_GFX1103 ON) + SET(KOKKOS_ARCH_NAVI ON) + BREAK() + ENDIF() STRING(REGEX MATCH "1030" IS_1030 ${ARCH}) IF(IS_1030) SET(KOKKOS_ARCH_AMD_GFX1030 ON) diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp index 142008124a..7b55f519c2 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -35,7 +35,8 @@ struct HIPTraits { static constexpr int WarpSize = 64; static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */ static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/ -#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) +#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \ + defined(KOKKOS_ARCH_AMD_GFX1103) static constexpr int WarpSize = 32; static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */ static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp index c1f7851f41..30195d96e0 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp @@ -143,7 +143,7 @@ class ParallelScan, local_offset_value = element_values(team_id, i - 1); // FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs #if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \ - !defined(KOKKOS_ARCH_AMD_GFX1100) + !defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103) if constexpr (Analysis::Reducer::has_join_member_function()) { if constexpr (std::is_void_v) a_functor_reducer.get_functor().join(local_offset_value, diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 4a69652616..c7addbe337 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -750,6 +750,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_AMD_GFX1100) declare_configuration_metadata("architecture", "GPU architecture", "AMD_GFX1100"); +#elif defined(KOKKOS_ARCH_AMD_GFX1103) + declare_configuration_metadata("architecture", "GPU architecture", + "AMD_GFX1103"); #else declare_configuration_metadata("architecture", "GPU architecture", "none"); diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 25370daa3f..70dd61f9af 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -164,6 +164,7 @@ display_help_text() { echo " AMD_GFX942 = AMD GPU MI300 GFX942" echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030" echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100" + echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU" From 41254b26c77860853ce6ce37c1dc153771174448 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 29 Jul 2024 17:01:34 -0400 Subject: [PATCH 2/4] backward compatibility not needed --- lib/kokkos/cmake/kokkos_arch.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake index 816a532950..df11c76cc3 100644 --- a/lib/kokkos/cmake/kokkos_arch.cmake +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -1045,12 +1045,6 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) SET(KOKKOS_ARCH_NAVI1100 ON) BREAK() ENDIF() - STRING(REGEX MATCH "1103" IS_1103 ${ARCH}) - IF(IS_1103) - SET(KOKKOS_ARCH_AMD_GFX1103 ON) - SET(KOKKOS_ARCH_NAVI ON) - BREAK() - ENDIF() STRING(REGEX MATCH "1030" IS_1030 ${ARCH}) IF(IS_1030) SET(KOKKOS_ARCH_AMD_GFX1030 ON) From e1f17cec7d709fecbd133540f0098491f9b829f6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 29 Jul 2024 17:11:51 -0400 Subject: [PATCH 3/4] update list of supported devices --- doc/src/Build_extras.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 4802c67420..eae247d66a 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -639,6 +639,9 @@ They must be specified in uppercase. * - AMD_GFX1100 - GPU - AMD GPU RX7900XTX + * - AMD_GFX1103 + - GPU + - AMD Phoenix APU with Radeon 740M/760M/780M/880M/890M * - INTEL_GEN - GPU - SPIR64-based devices, e.g. Intel GPUs, using JIT From cb6e71b72f399d28e135e44d2575f18ee96cfed7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 30 Jul 2024 08:46:01 -0400 Subject: [PATCH 4/4] fix typo --- lib/kokkos/Makefile.kokkos | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 2b5507a421..6fdddd9a53 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -433,7 +433,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH), ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))