forked from lijiext/lammps
Merge branch 'kokkos-amd-gfx1103-support' into collected-small-fixes
This commit is contained in:
commit
208216ba3d
|
@ -639,6 +639,9 @@ They must be specified in uppercase.
|
|||
* - AMD_GFX1100
|
||||
- GPU
|
||||
- AMD GPU RX7900XTX
|
||||
* - AMD_GFX1103
|
||||
- GPU
|
||||
- AMD Phoenix APU with Radeon 740M/760M/780M/880M/890M
|
||||
* - INTEL_GEN
|
||||
- GPU
|
||||
- SPIR64-based devices, e.g. Intel GPUs, using JIT
|
||||
|
|
|
@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
|
|||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
|
||||
# IBM: Power8,Power9
|
||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
|
||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
||||
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
|
||||
KOKKOS_ARCH ?= ""
|
||||
|
@ -433,6 +433,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
|
|||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
|
@ -1118,6 +1119,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
|
|||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
|
||||
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
|
||||
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
|
||||
|
|
|
@ -116,6 +116,7 @@
|
|||
#cmakedefine KOKKOS_ARCH_AMD_GFX942
|
||||
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
|
||||
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
|
||||
#cmakedefine KOKKOS_ARCH_AMD_GFX1103
|
||||
#cmakedefine KOKKOS_ARCH_AMD_GPU
|
||||
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
|
||||
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated
|
||||
|
|
|
@ -101,9 +101,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
|
|||
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
|
||||
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
|
||||
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
|
||||
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
|
||||
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
|
||||
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)
|
||||
LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
|
||||
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
|
||||
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)
|
||||
|
||||
#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
|
||||
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
|
||||
|
|
|
@ -35,7 +35,8 @@ struct HIPTraits {
|
|||
static constexpr int WarpSize = 64;
|
||||
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
|
||||
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
|
||||
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
|
||||
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
|
||||
defined(KOKKOS_ARCH_AMD_GFX1103)
|
||||
static constexpr int WarpSize = 32;
|
||||
static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */
|
||||
static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/
|
||||
|
|
|
@ -143,7 +143,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
|
|||
local_offset_value = element_values(team_id, i - 1);
|
||||
// FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
|
||||
#if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
|
||||
!defined(KOKKOS_ARCH_AMD_GFX1100)
|
||||
!defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103)
|
||||
if constexpr (Analysis::Reducer::has_join_member_function()) {
|
||||
if constexpr (std::is_void_v<WorkTag>)
|
||||
a_functor_reducer.get_functor().join(local_offset_value,
|
||||
|
|
|
@ -750,6 +750,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
|
|||
#elif defined(KOKKOS_ARCH_AMD_GFX1100)
|
||||
declare_configuration_metadata("architecture", "GPU architecture",
|
||||
"AMD_GFX1100");
|
||||
#elif defined(KOKKOS_ARCH_AMD_GFX1103)
|
||||
declare_configuration_metadata("architecture", "GPU architecture",
|
||||
"AMD_GFX1103");
|
||||
|
||||
#else
|
||||
declare_configuration_metadata("architecture", "GPU architecture", "none");
|
||||
|
|
|
@ -164,6 +164,7 @@ display_help_text() {
|
|||
echo " AMD_GFX942 = AMD GPU MI300 GFX942"
|
||||
echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030"
|
||||
echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100"
|
||||
echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103"
|
||||
echo " [ARM]"
|
||||
echo " ARMV80 = ARMv8.0 Compatible CPU"
|
||||
echo " ARMV81 = ARMv8.1 Compatible CPU"
|
||||
|
|
Loading…
Reference in New Issue