From 35d782feae7f0b817016315d8718a82c61968894 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 15:57:13 +0800 Subject: [PATCH] drm/amdgpu: add amdgpu soft reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset. If engine hangs, then triger engine soft reset, if soft reset fails, will fallback to full reset. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 104 +++++++++++++++++---- drivers/gpu/drm/amd/include/amd_shared.h | 2 + 2 files changed, 88 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b88620586c8e..2bd2b19d4666 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1962,7 +1962,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_block_status[i].valid) continue; - if (adev->ip_blocks[i].funcs->pre_soft_reset) { + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->pre_soft_reset) { r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); if (r) return r; @@ -1972,6 +1973,58 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) return 0; } +static bool amdgpu_need_full_reset(struct amdgpu_device *adev) +{ + if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { + DRM_INFO("Some block need full reset!\n"); + return true; + } + return false; +} + +static int amdgpu_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->soft_reset) { + r = adev->ip_blocks[i].funcs->soft_reset(adev); + if (r) + return r; + } + } + + return 0; +} + +static int amdgpu_post_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->post_soft_reset) + r = adev->ip_blocks[i].funcs->post_soft_reset(adev); + if (r) + return r; + } + + return 0; +} + /** * amdgpu_gpu_reset - reset the asic * @@ -1984,6 +2037,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) { int i, r; int resched; + bool need_full_reset; if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2007,28 +2061,42 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(adev); - /* save scratch */ - amdgpu_atombios_scratch_regs_save(adev); - r = amdgpu_suspend(adev); + need_full_reset = amdgpu_need_full_reset(adev); + + if (!need_full_reset) { + amdgpu_pre_soft_reset(adev); + r = amdgpu_soft_reset(adev); + amdgpu_post_soft_reset(adev); + if (r || amdgpu_check_soft_reset(adev)) { + DRM_INFO("soft reset failed, will fallback to full reset!\n"); + need_full_reset = true; + } + } + + if (need_full_reset) { + /* save scratch */ + amdgpu_atombios_scratch_regs_save(adev); + r = amdgpu_suspend(adev); retry: - /* Disable fb access */ - if (adev->mode_info.num_crtc) { - struct amdgpu_mode_mc_save save; - amdgpu_display_stop_mc_access(adev, &save); - amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); - } + /* Disable fb access */ + if (adev->mode_info.num_crtc) { + struct amdgpu_mode_mc_save save; + amdgpu_display_stop_mc_access(adev, &save); + amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); + } - r = amdgpu_asic_reset(adev); - /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); + r = amdgpu_asic_reset(adev); + /* post card */ + amdgpu_atom_asic_init(adev->mode_info.atom_context); - if (!r) { - dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume(adev); + if (!r) { + dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_resume(adev); + } + /* restore scratch */ + amdgpu_atombios_scratch_regs_restore(adev); } - /* restore scratch */ - amdgpu_atombios_scratch_regs_restore(adev); if (!r) { r = amdgpu_ib_ring_tests(adev); if (r) { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 2a2a5aa39b99..db710418f35f 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -165,6 +165,8 @@ struct amd_ip_funcs { int (*pre_soft_reset)(void *handle); /* soft reset the IP block */ int (*soft_reset)(void *handle); + /* post soft reset the IP block */ + int (*post_soft_reset)(void *handle); /* enable/disable cg for the IP block */ int (*set_clockgating_state)(void *handle, enum amd_clockgating_state state);