drm/amdkfd: Fix memory reporting on GFX 9.4.3
This patch fixes memory reporting on the GFX 9.4.3 APU and dGPU by reporting available memory on a per partition basis. If its an APU, available and used memory calculations take into account system and TTM memory. v2: squash in fix ("drm/amdkfd: Fix array out of bound warning") squash in fix ("drm/amdgpu: Update memory reporting for GFX9.4.3") Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
315e29eca5
commit
1c77527a69
|
@ -35,6 +35,7 @@
|
|||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
#include "amdgpu_sync.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "amdgpu_xcp.h"
|
||||
|
||||
extern uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
|
@ -98,8 +99,8 @@ struct amdgpu_amdkfd_fence {
|
|||
|
||||
struct amdgpu_kfd_dev {
|
||||
struct kfd_dev *dev;
|
||||
int64_t vram_used;
|
||||
uint64_t vram_used_aligned;
|
||||
int64_t vram_used[MAX_XCP];
|
||||
uint64_t vram_used_aligned[MAX_XCP];
|
||||
bool init_complete;
|
||||
struct work_struct reset_work;
|
||||
|
||||
|
@ -287,7 +288,8 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
|||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
|
||||
void *drm_priv);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
||||
uint8_t xcp_id);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
@ -327,9 +329,9 @@ void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
|||
int amdgpu_amdkfd_criu_resume(void *p);
|
||||
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag);
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id);
|
||||
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag);
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id);
|
||||
|
||||
#define KFD_XCP_MEM_ID(adev, xcp_id) \
|
||||
((adev)->xcp_mgr && (xcp_id) >= 0 ?\
|
||||
|
|
|
@ -157,12 +157,13 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
|
|||
* Return: returns -ENOMEM in case of error, ZERO otherwise
|
||||
*/
|
||||
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag)
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
size_t system_mem_needed, ttm_mem_needed, vram_needed;
|
||||
int ret = 0;
|
||||
uint64_t vram_size = 0;
|
||||
|
||||
system_mem_needed = 0;
|
||||
ttm_mem_needed = 0;
|
||||
|
@ -177,6 +178,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
* 2M BO chunk.
|
||||
*/
|
||||
vram_needed = size;
|
||||
/*
|
||||
* For GFX 9.4.3, get the VRAM size from XCP structs
|
||||
*/
|
||||
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
|
||||
return -EINVAL;
|
||||
|
||||
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
|
||||
if (adev->gmc.is_app_apu) {
|
||||
system_mem_needed = size;
|
||||
ttm_mem_needed = size;
|
||||
}
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
system_mem_needed = size;
|
||||
} else if (!(alloc_flag &
|
||||
|
@ -196,8 +208,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
|
||||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev && adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
|
||||
vram_size - reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
@ -207,9 +219,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
*/
|
||||
WARN_ONCE(vram_needed && !adev,
|
||||
"adev reference can't be null when vram is used");
|
||||
if (adev) {
|
||||
adev->kfd.vram_used += vram_needed;
|
||||
adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
|
||||
if (adev && xcp_id >= 0) {
|
||||
adev->kfd.vram_used[xcp_id] += vram_needed;
|
||||
adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
|
||||
vram_needed :
|
||||
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += system_mem_needed;
|
||||
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
|
||||
|
@ -220,7 +234,7 @@ release:
|
|||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 alloc_flag)
|
||||
uint64_t size, u32 alloc_flag, int8_t xcp_id)
|
||||
{
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
|
@ -230,9 +244,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
WARN_ONCE(!adev,
|
||||
"adev reference can't be null when alloc mem flags vram is set");
|
||||
if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
|
||||
goto release;
|
||||
|
||||
if (adev) {
|
||||
adev->kfd.vram_used -= size;
|
||||
adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
|
||||
adev->kfd.vram_used[xcp_id] -= size;
|
||||
if (adev->gmc.is_app_apu) {
|
||||
adev->kfd.vram_used_aligned[xcp_id] -= size;
|
||||
kfd_mem_limit.system_mem_used -= size;
|
||||
kfd_mem_limit.ttm_mem_used -= size;
|
||||
} else {
|
||||
adev->kfd.vram_used_aligned[xcp_id] -=
|
||||
ALIGN(size, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
}
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
kfd_mem_limit.system_mem_used -= size;
|
||||
|
@ -242,8 +266,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
|
||||
goto release;
|
||||
}
|
||||
WARN_ONCE(adev && adev->kfd.vram_used < 0,
|
||||
"KFD VRAM memory accounting unbalanced");
|
||||
WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
|
||||
"KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
|
||||
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||
"KFD TTM memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
|
@ -259,7 +283,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
|
|||
u32 alloc_flags = bo->kfd_bo->alloc_flags;
|
||||
u64 size = amdgpu_bo_size(bo);
|
||||
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
|
||||
bo->xcp_id);
|
||||
|
||||
kfree(bo->kfd_bo);
|
||||
}
|
||||
|
@ -1609,23 +1634,42 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
||||
uint8_t xcp_id)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
ssize_t available;
|
||||
uint64_t vram_available, system_mem_available, ttm_mem_available;
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
available = adev->gmc.real_vram_size
|
||||
- adev->kfd.vram_used_aligned
|
||||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id]
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt;
|
||||
|
||||
if (adev->gmc.is_app_apu) {
|
||||
system_mem_available = no_system_mem_limit ?
|
||||
kfd_mem_limit.max_system_mem_limit :
|
||||
kfd_mem_limit.max_system_mem_limit -
|
||||
kfd_mem_limit.system_mem_used;
|
||||
|
||||
ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
|
||||
kfd_mem_limit.ttm_mem_used;
|
||||
|
||||
available = min3(system_mem_available, ttm_mem_available,
|
||||
vram_available);
|
||||
available = ALIGN_DOWN(available, PAGE_SIZE);
|
||||
} else {
|
||||
available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
|
||||
}
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if (available < 0)
|
||||
available = 0;
|
||||
|
||||
return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
|
||||
return available;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
|
@ -1713,7 +1757,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
|
||||
xcp_id);
|
||||
if (ret) {
|
||||
pr_debug("Insufficient memory\n");
|
||||
goto err_reserve_limit;
|
||||
|
@ -1781,7 +1826,7 @@ err_node_allow:
|
|||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_limit;
|
||||
err_bo_create:
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
|
||||
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
|
||||
err_reserve_limit:
|
||||
mutex_destroy(&(*mem)->lock);
|
||||
if (gobj)
|
||||
|
|
|
@ -24,8 +24,11 @@
|
|||
#ifndef AMDGPU_XCP_H
|
||||
#define AMDGPU_XCP_H
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "amdgpu_ctx.h"
|
||||
|
||||
#define MAX_XCP 8
|
||||
|
||||
#define AMDGPU_XCP_MODE_NONE -1
|
||||
|
@ -34,6 +37,8 @@
|
|||
#define AMDGPU_XCP_FL_NONE 0
|
||||
#define AMDGPU_XCP_FL_LOCKED (1 << 0)
|
||||
|
||||
struct amdgpu_fpriv;
|
||||
|
||||
enum AMDGPU_XCP_IP_BLOCK {
|
||||
AMDGPU_XCP_GFXHUB,
|
||||
AMDGPU_XCP_GFX,
|
||||
|
|
|
@ -1044,7 +1044,8 @@ static int kfd_ioctl_get_available_memory(struct file *filep,
|
|||
|
||||
if (!pdd)
|
||||
return -EINVAL;
|
||||
args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
|
||||
args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,
|
||||
pdd->dev->node_id);
|
||||
kfd_unlock_pdd(pdd);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -280,7 +280,7 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
|
|||
if (update_mem_usage && !p->xnack_enabled) {
|
||||
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
|
||||
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
}
|
||||
mutex_destroy(&prange->lock);
|
||||
mutex_destroy(&prange->migrate_mutex);
|
||||
|
@ -313,7 +313,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
|
|||
p = container_of(svms, struct kfd_process, svms);
|
||||
if (!p->xnack_enabled && update_mem_usage &&
|
||||
amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
|
||||
pr_info("SVM mapping failed, exceeds resident system memory limit\n");
|
||||
kfree(prange);
|
||||
return NULL;
|
||||
|
@ -3037,10 +3037,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
|
|||
size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
|
||||
if (xnack_enabled) {
|
||||
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
} else {
|
||||
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
reserved_size += size;
|
||||
|
@ -3050,10 +3050,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
|
|||
size = (prange->last - prange->start + 1) << PAGE_SHIFT;
|
||||
if (xnack_enabled) {
|
||||
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
} else {
|
||||
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
if (r)
|
||||
goto out_unlock;
|
||||
reserved_size += size;
|
||||
|
@ -3066,7 +3066,7 @@ out_unlock:
|
|||
|
||||
if (r)
|
||||
amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
|
||||
else
|
||||
/* Change xnack mode must be inside svms lock, to avoid race with
|
||||
* svm_range_deferred_list_work unreserve memory in parallel.
|
||||
|
|
Loading…
Reference in New Issue