drm/amdgpu: Add KFD eviction fence
This fence is used by KFD to keep memory resident while user mode queues are enabled. Trying to evict memory will trigger the enable_signaling callback, which starts a KFD eviction, which involves preempting user mode queues before signaling the fence. There is one such fence per process. v2: * Grab a reference to mm_struct * Dereference fence after NULL check * Simplify fence release, no need to signal without anyone waiting * Added signed-off-by Harish, who is the original author of this code v3: * update MAINTAINERS file * change amd_kfd_ prefix to amdkfd_ * remove useless initialization of variable to NULL v4: * set amdkfd_fence_ops to be static * Suggested by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
1029a3f336
commit
d8d019ccff
|
@ -766,6 +766,7 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
|
|||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
|
||||
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
|
||||
F: drivers/gpu/drm/amd/amdkfd/
|
||||
F: drivers/gpu/drm/amd/include/cik_structs.h
|
||||
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
|
||||
|
|
|
@ -129,6 +129,7 @@ amdgpu-y += \
|
|||
# add amdkfd interfaces
|
||||
amdgpu-y += \
|
||||
amdgpu_amdkfd.o \
|
||||
amdgpu_amdkfd_fence.o \
|
||||
amdgpu_amdkfd_gfx_v8.o
|
||||
|
||||
# add cgs
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include <linux/mmu_context.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
|
||||
extern const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
struct amdgpu_device;
|
||||
|
||||
struct kgd_mem {
|
||||
|
@ -37,6 +39,19 @@ struct kgd_mem {
|
|||
void *cpu_ptr;
|
||||
};
|
||||
|
||||
/* KFD Memory Eviction */
|
||||
struct amdgpu_amdkfd_fence {
|
||||
struct dma_fence base;
|
||||
struct mm_struct *mm;
|
||||
spinlock_t lock;
|
||||
char timeline_name[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm);
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
|
||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
|
||||
|
||||
int amdgpu_amdkfd_init(void);
|
||||
void amdgpu_amdkfd_fini(void);
|
||||
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
/*
|
||||
* Copyright 2016-2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/dma-fence.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
static const struct dma_fence_ops amdkfd_fence_ops;
|
||||
static atomic_t fence_seq = ATOMIC_INIT(0);
|
||||
|
||||
/* Eviction Fence
|
||||
* Fence helper functions to deal with KFD memory eviction.
|
||||
* Big Idea - Since KFD submissions are done by user queues, a BO cannot be
|
||||
* evicted unless all the user queues for that process are evicted.
|
||||
*
|
||||
* All the BOs in a process share an eviction fence. When process X wants
|
||||
* to map VRAM memory but TTM can't find enough space, TTM will attempt to
|
||||
* evict BOs from its LRU list. TTM checks if the BO is valuable to evict
|
||||
* by calling ttm_bo_driver->eviction_valuable().
|
||||
*
|
||||
* ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
|
||||
* to process X. Otherwise, it will return true to indicate BO can be
|
||||
* evicted by TTM.
|
||||
*
|
||||
* If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
|
||||
* the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
|
||||
* --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
|
||||
*
|
||||
* GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
|
||||
* nofity when the BO is free to move. fence_add_callback --> enable_signaling
|
||||
* --> amdgpu_amdkfd_fence.enable_signaling
|
||||
*
|
||||
* amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
|
||||
* user queues and signal fence. The work item will also start another delayed
|
||||
* work item to restore BOs
|
||||
*/
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence;
|
||||
|
||||
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
||||
if (fence == NULL)
|
||||
return NULL;
|
||||
|
||||
/* This reference gets released in amdkfd_fence_release */
|
||||
mmgrab(mm);
|
||||
fence->mm = mm;
|
||||
get_task_comm(fence->timeline_name, current);
|
||||
spin_lock_init(&fence->lock);
|
||||
|
||||
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
|
||||
context, atomic_inc_return(&fence_seq));
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence;
|
||||
|
||||
if (!f)
|
||||
return NULL;
|
||||
|
||||
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
|
||||
if (fence && f->ops == &amdkfd_fence_ops)
|
||||
return fence;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
|
||||
{
|
||||
return "amdgpu_amdkfd_fence";
|
||||
}
|
||||
|
||||
static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
return fence->timeline_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
|
||||
* a KFD BO and schedules a job to move the BO.
|
||||
* If fence is already signaled return true.
|
||||
* If fence is not signaled schedule a evict KFD process work item.
|
||||
*/
|
||||
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
if (!fence)
|
||||
return false;
|
||||
|
||||
if (dma_fence_is_signaled(f))
|
||||
return true;
|
||||
|
||||
if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_release - callback that fence can be freed
|
||||
*
|
||||
* @fence: fence
|
||||
*
|
||||
* This function is called when the reference count becomes zero.
|
||||
* Drops the mm_struct reference and RCU schedules freeing up the fence.
|
||||
*/
|
||||
static void amdkfd_fence_release(struct dma_fence *f)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
/* Unconditionally signal the fence. The process is getting
|
||||
* terminated.
|
||||
*/
|
||||
if (WARN_ON(!fence))
|
||||
return; /* Not an amdgpu_amdkfd_fence */
|
||||
|
||||
mmdrop(fence->mm);
|
||||
kfree_rcu(f, rcu);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
|
||||
* if same return TRUE else return FALSE.
|
||||
*
|
||||
* @f: [IN] fence
|
||||
* @mm: [IN] mm that needs to be verified
|
||||
*/
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
|
||||
{
|
||||
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
|
||||
|
||||
if (!fence)
|
||||
return false;
|
||||
else if (fence->mm == mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops amdkfd_fence_ops = {
|
||||
.get_driver_name = amdkfd_fence_get_driver_name,
|
||||
.get_timeline_name = amdkfd_fence_get_timeline_name,
|
||||
.enable_signaling = amdkfd_fence_enable_signaling,
|
||||
.signaled = NULL,
|
||||
.wait = dma_fence_default_wait,
|
||||
.release = amdkfd_fence_release,
|
||||
};
|
|
@ -36,8 +36,9 @@
|
|||
#define AMDGPU_MAX_UVD_ENC_RINGS 2
|
||||
|
||||
/* some special values for the owner field */
|
||||
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
|
||||
#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
|
||||
#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
|
||||
#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
|
||||
#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
|
||||
|
||||
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
|
||||
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
struct amdgpu_sync_entry {
|
||||
struct hlist_node node;
|
||||
|
@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
|
|||
*/
|
||||
static void *amdgpu_sync_get_owner(struct dma_fence *f)
|
||||
{
|
||||
struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
|
||||
struct drm_sched_fence *s_fence;
|
||||
struct amdgpu_amdkfd_fence *kfd_fence;
|
||||
|
||||
if (!f)
|
||||
return AMDGPU_FENCE_OWNER_UNDEFINED;
|
||||
|
||||
s_fence = to_drm_sched_fence(f);
|
||||
if (s_fence)
|
||||
return s_fence->owner;
|
||||
|
||||
kfd_fence = to_amdgpu_amdkfd_fence(f);
|
||||
if (kfd_fence)
|
||||
return AMDGPU_FENCE_OWNER_KFD;
|
||||
|
||||
return AMDGPU_FENCE_OWNER_UNDEFINED;
|
||||
}
|
||||
|
||||
|
@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
|
|||
for (i = 0; i < flist->shared_count; ++i) {
|
||||
f = rcu_dereference_protected(flist->shared[i],
|
||||
reservation_object_held(resv));
|
||||
/* We only want to trigger KFD eviction fences on
|
||||
* evict or move jobs. Skip KFD fences otherwise.
|
||||
*/
|
||||
fence_owner = amdgpu_sync_get_owner(f);
|
||||
if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
|
||||
owner != AMDGPU_FENCE_OWNER_UNDEFINED)
|
||||
continue;
|
||||
|
||||
if (amdgpu_sync_same_dev(adev, f)) {
|
||||
/* VM updates are only interesting
|
||||
* for other VM updates and moves.
|
||||
*/
|
||||
fence_owner = amdgpu_sync_get_owner(f);
|
||||
if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
|
||||
(fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
|
||||
((owner == AMDGPU_FENCE_OWNER_VM) !=
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_trace.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "bif/bif_4_1_d.h"
|
||||
|
||||
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
|
||||
|
@ -1171,6 +1172,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
|
|||
{
|
||||
unsigned long num_pages = bo->mem.num_pages;
|
||||
struct drm_mm_node *node = bo->mem.mm_node;
|
||||
struct reservation_object_list *flist;
|
||||
struct dma_fence *f;
|
||||
int i;
|
||||
|
||||
/* If bo is a KFD BO, check if the bo belongs to the current process.
|
||||
* If true, then return false as any KFD process needs all its BOs to
|
||||
* be resident to run successfully
|
||||
*/
|
||||
flist = reservation_object_get_list(bo->resv);
|
||||
if (flist) {
|
||||
for (i = 0; i < flist->shared_count; ++i) {
|
||||
f = rcu_dereference_protected(flist->shared[i],
|
||||
reservation_object_held(bo->resv));
|
||||
if (amdkfd_fence_check_mm(f, current->mm))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
switch (bo->mem.mem_type) {
|
||||
case TTM_PL_TT:
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/dma-fence.h>
|
||||
|
||||
struct pci_dev;
|
||||
|
||||
|
@ -286,6 +287,9 @@ struct kfd2kgd_calls {
|
|||
*
|
||||
* @resume: Notifies amdkfd about a resume action done to a kgd device
|
||||
*
|
||||
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
|
||||
* for safe eviction of KFD BOs that belong to the specified process.
|
||||
*
|
||||
* This structure contains function callback pointers so the kgd driver
|
||||
* will notify to the amdkfd about certain status changes.
|
||||
*
|
||||
|
@ -300,6 +304,8 @@ struct kgd2kfd_calls {
|
|||
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
void (*suspend)(struct kfd_dev *kfd);
|
||||
int (*resume)(struct kfd_dev *kfd);
|
||||
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
};
|
||||
|
||||
int kgd2kfd_init(unsigned interface_version,
|
||||
|
|
Loading…
Reference in New Issue