drm/i915: Restore engine->submit_request before unwedging
When we wedge the device, we override engine->submit_request with a nop
to ensure that all in-flight requests are marked in error. However, igt
would like to unwedge the device to test -EIO handling. This requires us
to flush those in-flight requests and restore the original
engine->submit_request.
v2: Use a vfunc to unify enabling request submission to engines
v3: Split new vfunc to a separate patch.
v4: Make the wait interruptible -- the third party fences we wait upon
may be indefinitely broken, so allow the reset to be aborted.
Fixes: 821ed7df6e
("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_eio
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v3
Link: http://patchwork.freedesktop.org/patch/msgid/20170316171305.12972-3-chris@chris-wilson.co.uk
This commit is contained in:
parent
ff44ad51eb
commit
2e8f9d3229
|
@ -1821,7 +1821,9 @@ void i915_reset(struct drm_i915_private *dev_priv)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Clear any previous failed attempts at recovery. Time to try again. */
|
/* Clear any previous failed attempts at recovery. Time to try again. */
|
||||||
__clear_bit(I915_WEDGED, &error->flags);
|
if (!i915_gem_unset_wedged(dev_priv))
|
||||||
|
goto wakeup;
|
||||||
|
|
||||||
error->reset_count++;
|
error->reset_count++;
|
||||||
|
|
||||||
pr_notice("drm/i915: Resetting chip after gpu hang\n");
|
pr_notice("drm/i915: Resetting chip after gpu hang\n");
|
||||||
|
@ -1867,17 +1869,18 @@ void i915_reset(struct drm_i915_private *dev_priv)
|
||||||
|
|
||||||
i915_queue_hangcheck(dev_priv);
|
i915_queue_hangcheck(dev_priv);
|
||||||
|
|
||||||
wakeup:
|
finish:
|
||||||
i915_gem_reset_finish(dev_priv);
|
i915_gem_reset_finish(dev_priv);
|
||||||
enable_irq(dev_priv->drm.irq);
|
enable_irq(dev_priv->drm.irq);
|
||||||
|
|
||||||
|
wakeup:
|
||||||
clear_bit(I915_RESET_HANDOFF, &error->flags);
|
clear_bit(I915_RESET_HANDOFF, &error->flags);
|
||||||
wake_up_bit(&error->flags, I915_RESET_HANDOFF);
|
wake_up_bit(&error->flags, I915_RESET_HANDOFF);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
i915_gem_set_wedged(dev_priv);
|
i915_gem_set_wedged(dev_priv);
|
||||||
goto wakeup;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int i915_pm_suspend(struct device *kdev)
|
static int i915_pm_suspend(struct device *kdev)
|
||||||
|
|
|
@ -3441,6 +3441,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
|
||||||
void i915_gem_reset(struct drm_i915_private *dev_priv);
|
void i915_gem_reset(struct drm_i915_private *dev_priv);
|
||||||
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
|
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
|
||||||
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
|
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
|
||||||
|
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
|
||||||
|
|
||||||
void i915_gem_init_mmio(struct drm_i915_private *i915);
|
void i915_gem_init_mmio(struct drm_i915_private *i915);
|
||||||
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
|
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
|
||||||
|
|
|
@ -2997,6 +2997,65 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
|
||||||
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
|
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
||||||
|
{
|
||||||
|
struct i915_gem_timeline *tl;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
lockdep_assert_held(&i915->drm.struct_mutex);
|
||||||
|
if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Before unwedging, make sure that all pending operations
|
||||||
|
* are flushed and errored out - we may have requests waiting upon
|
||||||
|
* third party fences. We marked all inflight requests as EIO, and
|
||||||
|
* every execbuf since returned EIO, for consistency we want all
|
||||||
|
* the currently pending requests to also be marked as EIO, which
|
||||||
|
* is done inside our nop_submit_request - and so we must wait.
|
||||||
|
*
|
||||||
|
* No more can be submitted until we reset the wedged bit.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(tl, &i915->gt.timelines, link) {
|
||||||
|
for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
|
||||||
|
struct drm_i915_gem_request *rq;
|
||||||
|
|
||||||
|
rq = i915_gem_active_peek(&tl->engine[i].last_request,
|
||||||
|
&i915->drm.struct_mutex);
|
||||||
|
if (!rq)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* We can't use our normal waiter as we want to
|
||||||
|
* avoid recursively trying to handle the current
|
||||||
|
* reset. The basic dma_fence_default_wait() installs
|
||||||
|
* a callback for dma_fence_signal(), which is
|
||||||
|
* triggered by our nop handler (indirectly, the
|
||||||
|
* callback enables the signaler thread which is
|
||||||
|
* woken by the nop_submit_request() advancing the seqno
|
||||||
|
* and when the seqno passes the fence, the signaler
|
||||||
|
* then signals the fence waking us up).
|
||||||
|
*/
|
||||||
|
if (dma_fence_default_wait(&rq->fence, true,
|
||||||
|
MAX_SCHEDULE_TIMEOUT) < 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Undo nop_submit_request. We prevent all new i915 requests from
|
||||||
|
* being queued (by disallowing execbuf whilst wedged) so having
|
||||||
|
* waited for all active requests above, we know the system is idle
|
||||||
|
* and do not have to worry about a thread being inside
|
||||||
|
* engine->submit_request() as we swap over. So unlike installing
|
||||||
|
* the nop_submit_request on reset, we can do this from normal
|
||||||
|
* context and do not require stop_machine().
|
||||||
|
*/
|
||||||
|
intel_engines_reset_default_submission(i915);
|
||||||
|
|
||||||
|
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
|
||||||
|
clear_bit(I915_WEDGED, &i915->gpu_error.flags);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
i915_gem_retire_work_handler(struct work_struct *work)
|
i915_gem_retire_work_handler(struct work_struct *work)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue