drm/i915/gt: Unlock engine-pm after queuing the kernel context switch
In commita79ca656b6
("drm/i915: Push the wakeref->count deferral to the backend"), I erroneously concluded that we last modify the engine inside __i915_request_commit() meaning that we could enable concurrent submission for userspace as we enqueued this request. However, this falls into a trap with other users of the engine->kernel_context waking up and submitting their request before the idle-switch is queued, with the result that the kernel_context is executed out-of-sequence most likely upsetting the GPU and certainly ourselves when we try to retire the out-of-sequence requests. As such we need to hold onto the effective engine->kernel_context mutex lock (via the engine pm mutex proxy) until we have finish queuing the request to the engine. v2: Serialise against concurrent intel_gt_retire_requests() v3: Describe the hairy locking scheme with intel_gt_retire_requests() for future reference. v4: Combine timeline->lock and engine pm release; it's hairy. Fixes:a79ca656b6
("drm/i915: Push the wakeref->count deferral to the backend") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-2-chris@chris-wilson.co.uk
This commit is contained in:
parent
a6edbca74b
commit
5cba288466
|
@ -73,8 +73,25 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
|
||||||
|
|
||||||
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
|
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
|
||||||
|
|
||||||
|
static void
|
||||||
|
__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
|
||||||
|
struct intel_engine_cs *engine)
|
||||||
|
{
|
||||||
|
struct intel_gt_timelines *timelines = &engine->gt->timelines;
|
||||||
|
|
||||||
|
spin_lock(&timelines->lock);
|
||||||
|
|
||||||
|
if (!atomic_fetch_inc(&tl->active_count))
|
||||||
|
list_add_tail(&tl->link, &timelines->active_list);
|
||||||
|
|
||||||
|
__intel_wakeref_defer_park(&engine->wakeref);
|
||||||
|
|
||||||
|
spin_unlock(&timelines->lock);
|
||||||
|
}
|
||||||
|
|
||||||
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||||
{
|
{
|
||||||
|
struct intel_context *ce = engine->kernel_context;
|
||||||
struct i915_request *rq;
|
struct i915_request *rq;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool result = true;
|
bool result = true;
|
||||||
|
@ -98,16 +115,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||||
* This should hold true as we can only park the engine after
|
* This should hold true as we can only park the engine after
|
||||||
* retiring the last request, thus all rings should be empty and
|
* retiring the last request, thus all rings should be empty and
|
||||||
* all timelines idle.
|
* all timelines idle.
|
||||||
|
*
|
||||||
|
* For unlocking, there are 2 other parties and the GPU who have a
|
||||||
|
* stake here.
|
||||||
|
*
|
||||||
|
* A new gpu user will be waiting on the engine-pm to start their
|
||||||
|
* engine_unpark. New waiters are predicated on engine->wakeref.count
|
||||||
|
* and so intel_wakeref_defer_park() acts like a mutex_unlock of the
|
||||||
|
* engine->wakeref.
|
||||||
|
*
|
||||||
|
* The other party is intel_gt_retire_requests(), which is walking the
|
||||||
|
* list of active timelines looking for completions. Meanwhile as soon
|
||||||
|
* as we call __i915_request_queue(), the GPU may complete our request.
|
||||||
|
* Ergo, if we put ourselves on the timelines.active_list
|
||||||
|
* (se intel_timeline_enter()) before we increment the
|
||||||
|
* engine->wakeref.count, we may see the request completion and retire
|
||||||
|
* it causing an undeflow of the engine->wakeref.
|
||||||
*/
|
*/
|
||||||
flags = __timeline_mark_lock(engine->kernel_context);
|
flags = __timeline_mark_lock(ce);
|
||||||
|
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
|
||||||
|
|
||||||
rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
|
rq = __i915_request_create(ce, GFP_NOWAIT);
|
||||||
if (IS_ERR(rq))
|
if (IS_ERR(rq))
|
||||||
/* Context switch failed, hope for the best! Maybe reset? */
|
/* Context switch failed, hope for the best! Maybe reset? */
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
intel_timeline_enter(i915_request_timeline(rq));
|
|
||||||
|
|
||||||
/* Check again on the next retirement. */
|
/* Check again on the next retirement. */
|
||||||
engine->wakeref_serial = engine->serial + 1;
|
engine->wakeref_serial = engine->serial + 1;
|
||||||
i915_request_add_active_barriers(rq);
|
i915_request_add_active_barriers(rq);
|
||||||
|
@ -116,13 +148,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||||
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
|
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
|
||||||
__i915_request_commit(rq);
|
__i915_request_commit(rq);
|
||||||
|
|
||||||
/* Release our exclusive hold on the engine */
|
|
||||||
__intel_wakeref_defer_park(&engine->wakeref);
|
|
||||||
__i915_request_queue(rq, NULL);
|
__i915_request_queue(rq, NULL);
|
||||||
|
|
||||||
|
/* Expose ourselves to intel_gt_retire_requests() and new submission */
|
||||||
|
__intel_timeline_enter_and_release_pm(ce->timeline, engine);
|
||||||
|
|
||||||
result = false;
|
result = false;
|
||||||
out_unlock:
|
out_unlock:
|
||||||
__timeline_mark_unlock(engine->kernel_context, flags);
|
__timeline_mark_unlock(ce, flags);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue