drm/amdkfd: map gpu hive id to xgmi connected cpu

ROCr needs to be able to identify all devices that have direct access to
fine grain memory, which should include CPUs that are connected to GPUs
over xGMI. The GPU hive ID can be mapped onto the CPU hive ID since the
CPU is part of the hive.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jonathan Kim 2021-10-14 12:34:27 -04:00 committed by Alex Deucher
parent c72942c167
commit d5edb56fbc
1 changed files with 18 additions and 1 deletions

View File

@ -1296,6 +1296,24 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
adev = (struct amdgpu_device *)(gpu->kgd);
/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) {
struct kfd_topology_device *top_dev;
down_read(&topology_lock);
list_for_each_entry(top_dev, &topology_device_list, list) {
if (top_dev->gpu)
break;
top_dev->node_props.hive_id = gpu->hive_id;
}
up_read(&topology_lock);
}
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
* else create a Virtual CRAT for this gpu device and then parse that
@ -1457,7 +1475,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.max_waves_per_simd = 10;
}
adev = (struct amdgpu_device *)(dev->gpu->kgd);
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?