drm/amdgpu: Fix some unload driver issues

When unloading driver after killing some applications, it will hit sdma
flush tlb job timeout which is called by ttm_bo_delay_delete. So
to avoid the job submit after fence driver fini, call ttm_bo_lock_delayed_workqueue
before fence driver fini. And also put drm_sched_fini before waiting fence.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Emily Deng 2021-03-04 19:30:51 +08:00 committed by Alex Deucher
parent 3c73683c23
commit bb0cd09be4
2 changed files with 4 additions and 2 deletions

View File

@ -3598,6 +3598,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
{
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
adev->shutdown = true;
kfree(adev->pci_state);

View File

@ -531,6 +531,8 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
if (!ring->no_scheduler)
drm_sched_fini(&ring->sched);
r = amdgpu_fence_wait_empty(ring);
if (r) {
/* no need to trigger GPU reset as we are unloading */
@ -539,8 +541,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
if (ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
if (!ring->no_scheduler)
drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]);