From 1830374e1332c93f65399687b72ff55ca4d8c978 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 May 2019 13:11:07 +0100 Subject: [PATCH] drm/i915: Cancel retire_worker on parking Replace the racy continuation check within retire_work with a definite kill-switch on idling. The race was being exposed by gem_concurrent_blit where the retire_worker would be terminated too early leaving us spinning in debugfs/i915_drop_caches with nothing flushing the retirement queue. Although that the igt is trying to idle from one child while submitting from another may be a contributing factor as to why it runs so slowly... v2: Use the non-sync version of cancel_delayed_work(), we only need to stop it from being scheduled as we independently check whether now is the right time to be parking. Testcase: igt/gem_concurrent_blit Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190507121108.18377-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_pm.c | 18 ++++++++++++------ .../gpu/drm/i915/selftests/mock_gem_device.c | 1 - 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c index ae91ad7cb31e..fa9c2ebd966a 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -30,15 +30,23 @@ static void idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.idle_work); + bool restart = true; + cancel_delayed_work(&i915->gem.retire_work); mutex_lock(&i915->drm.struct_mutex); intel_wakeref_lock(&i915->gt.wakeref); - if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) + if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { i915_gem_park(i915); + restart = false; + } intel_wakeref_unlock(&i915->gt.wakeref); mutex_unlock(&i915->drm.struct_mutex); + if (restart) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); } static void retire_work_handler(struct work_struct *work) @@ -52,10 +60,9 @@ static void retire_work_handler(struct work_struct *work) mutex_unlock(&i915->drm.struct_mutex); } - if (intel_wakeref_active(&i915->gt.wakeref)) - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); } static int pm_notifier(struct notifier_block *nb, @@ -140,7 +147,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) * Assert that we successfully flushed all the work and * reset the GPU back to its idle, low power state. */ - drain_delayed_work(&i915->gem.retire_work); GEM_BUG_ON(i915->gt.awake); flush_work(&i915->gem.idle_work); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d919f512042c..9fd02025d382 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,7 +58,6 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gem.retire_work); flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915);