drm/amdgpu: add timer to fence to detect scheduler lockup
Change-Id: I67e987db0efdca28faa80b332b75571192130d33 Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> Reviewed-by: David Zhou <david1.zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
parent
d6c10f6b81
commit
2440ff2c91
|
@ -628,8 +628,20 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
|
|||
init_waitqueue_head(&ring->fence_drv.fence_queue);
|
||||
|
||||
if (amdgpu_enable_scheduler) {
|
||||
long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
|
||||
if (timeout == 0) {
|
||||
/*
|
||||
* FIXME:
|
||||
* Delayed workqueue cannot use it directly,
|
||||
* so the scheduler will not use delayed workqueue if
|
||||
* MAX_SCHEDULE_TIMEOUT is set.
|
||||
* Currently keep it simple and silly.
|
||||
*/
|
||||
timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
}
|
||||
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
|
||||
amdgpu_sched_hw_submission, ring->name);
|
||||
amdgpu_sched_hw_submission,
|
||||
timeout, ring->name);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to create scheduler on ring %s.\n",
|
||||
ring->name);
|
||||
|
|
|
@ -327,19 +327,49 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
|
|||
struct amd_sched_fence *s_fence =
|
||||
container_of(cb, struct amd_sched_fence, cb);
|
||||
struct amd_gpu_scheduler *sched = s_fence->sched;
|
||||
unsigned long flags;
|
||||
|
||||
atomic_dec(&sched->hw_rq_count);
|
||||
amd_sched_fence_signal(s_fence);
|
||||
if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
|
||||
cancel_delayed_work_sync(&s_fence->dwork);
|
||||
spin_lock_irqsave(&sched->fence_list_lock, flags);
|
||||
list_del_init(&s_fence->list);
|
||||
spin_unlock_irqrestore(&sched->fence_list_lock, flags);
|
||||
}
|
||||
fence_put(&s_fence->base);
|
||||
wake_up_interruptible(&sched->wake_up_worker);
|
||||
}
|
||||
|
||||
static void amd_sched_fence_work_func(struct work_struct *work)
|
||||
{
|
||||
struct amd_sched_fence *s_fence =
|
||||
container_of(work, struct amd_sched_fence, dwork.work);
|
||||
struct amd_gpu_scheduler *sched = s_fence->sched;
|
||||
struct amd_sched_fence *entity, *tmp;
|
||||
unsigned long flags;
|
||||
|
||||
DRM_ERROR("[%s] scheduler is timeout!\n", sched->name);
|
||||
|
||||
/* Clean all pending fences */
|
||||
list_for_each_entry_safe(entity, tmp, &sched->fence_list, list) {
|
||||
DRM_ERROR(" fence no %d\n", entity->base.seqno);
|
||||
cancel_delayed_work_sync(&entity->dwork);
|
||||
spin_lock_irqsave(&sched->fence_list_lock, flags);
|
||||
list_del_init(&entity->list);
|
||||
spin_unlock_irqrestore(&sched->fence_list_lock, flags);
|
||||
fence_put(&entity->base);
|
||||
}
|
||||
}
|
||||
|
||||
static int amd_sched_main(void *param)
|
||||
{
|
||||
struct sched_param sparam = {.sched_priority = 1};
|
||||
struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
|
||||
int r, count;
|
||||
|
||||
spin_lock_init(&sched->fence_list_lock);
|
||||
INIT_LIST_HEAD(&sched->fence_list);
|
||||
sched_setscheduler(current, SCHED_FIFO, &sparam);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
@ -347,6 +377,7 @@ static int amd_sched_main(void *param)
|
|||
struct amd_sched_fence *s_fence;
|
||||
struct amd_sched_job *sched_job;
|
||||
struct fence *fence;
|
||||
unsigned long flags;
|
||||
|
||||
wait_event_interruptible(sched->wake_up_worker,
|
||||
kthread_should_stop() ||
|
||||
|
@ -357,6 +388,15 @@ static int amd_sched_main(void *param)
|
|||
|
||||
entity = sched_job->s_entity;
|
||||
s_fence = sched_job->s_fence;
|
||||
|
||||
if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
|
||||
INIT_DELAYED_WORK(&s_fence->dwork, amd_sched_fence_work_func);
|
||||
schedule_delayed_work(&s_fence->dwork, sched->timeout);
|
||||
spin_lock_irqsave(&sched->fence_list_lock, flags);
|
||||
list_add_tail(&s_fence->list, &sched->fence_list);
|
||||
spin_unlock_irqrestore(&sched->fence_list_lock, flags);
|
||||
}
|
||||
|
||||
atomic_inc(&sched->hw_rq_count);
|
||||
fence = sched->ops->run_job(sched_job);
|
||||
if (fence) {
|
||||
|
@ -392,11 +432,12 @@ static int amd_sched_main(void *param)
|
|||
*/
|
||||
int amd_sched_init(struct amd_gpu_scheduler *sched,
|
||||
struct amd_sched_backend_ops *ops,
|
||||
unsigned hw_submission, const char *name)
|
||||
unsigned hw_submission, long timeout, const char *name)
|
||||
{
|
||||
sched->ops = ops;
|
||||
sched->hw_submission_limit = hw_submission;
|
||||
sched->name = name;
|
||||
sched->timeout = timeout;
|
||||
amd_sched_rq_init(&sched->sched_rq);
|
||||
amd_sched_rq_init(&sched->kernel_rq);
|
||||
|
||||
|
|
|
@ -68,6 +68,8 @@ struct amd_sched_fence {
|
|||
struct amd_gpu_scheduler *sched;
|
||||
spinlock_t lock;
|
||||
void *owner;
|
||||
struct delayed_work dwork;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct amd_sched_job {
|
||||
|
@ -103,18 +105,21 @@ struct amd_sched_backend_ops {
|
|||
struct amd_gpu_scheduler {
|
||||
struct amd_sched_backend_ops *ops;
|
||||
uint32_t hw_submission_limit;
|
||||
long timeout;
|
||||
const char *name;
|
||||
struct amd_sched_rq sched_rq;
|
||||
struct amd_sched_rq kernel_rq;
|
||||
wait_queue_head_t wake_up_worker;
|
||||
wait_queue_head_t job_scheduled;
|
||||
atomic_t hw_rq_count;
|
||||
struct list_head fence_list;
|
||||
spinlock_t fence_list_lock;
|
||||
struct task_struct *thread;
|
||||
};
|
||||
|
||||
int amd_sched_init(struct amd_gpu_scheduler *sched,
|
||||
struct amd_sched_backend_ops *ops,
|
||||
uint32_t hw_submission, const char *name);
|
||||
uint32_t hw_submission, long timeout, const char *name);
|
||||
void amd_sched_fini(struct amd_gpu_scheduler *sched);
|
||||
|
||||
int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
|
||||
|
|
Loading…
Reference in New Issue