drm/amdkfd: Add available memory ioctl
Add a new KFD ioctl to return the largest possible memory size that can be allocated as a buffer object using kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same accept/reject criteria as that function so that allocating a new buffer object of the size returned by this new ioctl is guaranteed to succeed, barring races with other allocating tasks. This IOCTL will be used by libhsakmt: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html Signed-off-by: Daniel Phillips <Daniel.Phillips@amd.com> Signed-off-by: David Yat Sin <David.YatSin@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
1a65327a84
commit
9731dd4cad
|
@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
|
|||
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
|
||||
void *drm_priv);
|
||||
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
|
|
@ -38,6 +38,12 @@
|
|||
*/
|
||||
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
|
||||
|
||||
/*
|
||||
* Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
|
||||
* BO chunk
|
||||
*/
|
||||
#define VRAM_ALLOCATION_ALIGN (1 << 21)
|
||||
|
||||
/* Impose limit on how much memory KFD can use */
|
||||
static struct {
|
||||
uint64_t max_system_mem_limit;
|
||||
|
@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
|
|||
* compromise that should work in most cases without reserving too
|
||||
* much memory for page tables unnecessarily (factor 16K, >> 14).
|
||||
*/
|
||||
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
|
||||
#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
|
||||
|
||||
static size_t amdgpu_amdkfd_acc_size(uint64_t size)
|
||||
{
|
||||
|
@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
system_mem_needed = acc_size;
|
||||
ttm_mem_needed = acc_size;
|
||||
vram_needed = size;
|
||||
|
||||
/*
|
||||
* Conservatively round up the allocation requirement to 2 MB
|
||||
* to avoid fragmentation caused by 4K allocations in the tail
|
||||
* 2M BO chunk.
|
||||
*/
|
||||
vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size;
|
||||
|
@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
adev->gmc.real_vram_size -
|
||||
atomic64_read(&adev->vram_pin_size) -
|
||||
reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
|
|||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
|
||||
kfd_mem_limit.system_mem_used -= acc_size;
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
adev->kfd.vram_used -= size;
|
||||
adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
|
||||
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
|
@ -1633,6 +1647,22 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
size_t available;
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
available = adev->gmc.real_vram_size
|
||||
- adev->kfd.vram_used
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt;
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||
struct amdgpu_device *adev, uint64_t va, uint64_t size,
|
||||
void *drm_priv, struct kgd_mem **mem,
|
||||
|
|
|
@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
|
|||
static struct class *kfd_class;
|
||||
struct device *kfd_device;
|
||||
|
||||
static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_process_device_data_by_id(p, gpu_id);
|
||||
|
||||
if (pdd)
|
||||
return pdd;
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
|
||||
{
|
||||
mutex_unlock(&pdd->process->mutex);
|
||||
}
|
||||
|
||||
int kfd_chardev_init(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_available_memory(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
struct kfd_ioctl_get_available_memory_args *args = data;
|
||||
struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
|
||||
|
||||
if (!pdd)
|
||||
return -EINVAL;
|
||||
args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
|
||||
kfd_unlock_pdd(pdd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
|
@ -2648,6 +2680,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
|||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
|
||||
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
|
||||
kfd_ioctl_get_available_memory, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
|
|
@ -34,9 +34,10 @@
|
|||
* - 1.6 - Query clear flags in SVM get_attr API
|
||||
* - 1.7 - Checkpoint Restore (CRIU) API
|
||||
* - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
|
||||
* - 1.9 - Add available memory ioctl
|
||||
*/
|
||||
#define KFD_IOCTL_MAJOR_VERSION 1
|
||||
#define KFD_IOCTL_MINOR_VERSION 8
|
||||
#define KFD_IOCTL_MINOR_VERSION 9
|
||||
|
||||
struct kfd_ioctl_get_version_args {
|
||||
__u32 major_version; /* from KFD */
|
||||
|
@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args {
|
|||
__u32 pad;
|
||||
};
|
||||
|
||||
struct kfd_ioctl_get_available_memory_args {
|
||||
__u64 available; /* from KFD */
|
||||
__u32 gpu_id; /* to KFD */
|
||||
__u32 pad;
|
||||
};
|
||||
|
||||
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
|
||||
#define KFD_IOC_CACHE_POLICY_COHERENT 0
|
||||
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
|
||||
|
@ -826,7 +833,10 @@ struct kfd_ioctl_set_xnack_mode_args {
|
|||
#define AMDKFD_IOC_CRIU_OP \
|
||||
AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
|
||||
|
||||
#define AMDKFD_IOC_AVAILABLE_MEMORY \
|
||||
AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
|
||||
|
||||
#define AMDKFD_COMMAND_START 0x01
|
||||
#define AMDKFD_COMMAND_END 0x23
|
||||
#define AMDKFD_COMMAND_END 0x24
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue