drm/i915: Identify active requests

To allow requests to forgo a common execution timeline, one question we
need to be able to answer is "is this request running?". To track
whether a request has started on HW, we can emit a breadcrumb at the
beginning of the request and check its timeline's HWSP to see if the
breadcrumb has advanced past the start of this request. (This is in
contrast to the global timeline where we need only ask if we are on the
global timeline and if the timeline has advanced past the end of the
previous request.)

There is still confusion from a preempted request, which has already
started but relinquished the HW to a high priority request. For the
common case, this discrepancy should be negligible. However, for
identification of hung requests, knowing which one was running at the
time of the hang will be much more important.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190129185452.20989-2-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2019-01-29 18:54:50 +00:00
parent 06039d9820
commit 8547444137
11 changed files with 96 additions and 25 deletions

View File

@ -2871,6 +2871,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
return 0; return 0;
} }
static bool match_ring(struct i915_request *rq)
{
struct drm_i915_private *dev_priv = rq->i915;
u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
return ring == i915_ggtt_offset(rq->ring->vma);
}
struct i915_request * struct i915_request *
i915_gem_find_active_request(struct intel_engine_cs *engine) i915_gem_find_active_request(struct intel_engine_cs *engine)
{ {
@ -2893,6 +2901,13 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
if (i915_request_completed(request)) if (i915_request_completed(request))
continue; continue;
if (!i915_request_started(request))
break;
/* More than one preemptible request may match! */
if (!match_ring(request))
break;
active = request; active = request;
break; break;
} }

View File

@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb)
return err; return err;
} }
/*
* After we completed waiting for other engines (using HW semaphores)
* then we can signal that this request/batch is ready to run. This
* allows us to determine if the batch is still waiting on the GPU
* or actually running by checking the breadcrumb.
*/
if (eb->engine->emit_init_breadcrumb) {
err = eb->engine->emit_init_breadcrumb(eb->request);
if (err)
return err;
}
err = eb->engine->emit_bb_start(eb->request, err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start + eb->batch->node.start +
eb->batch_start_offset, eb->batch_start_offset,

View File

@ -333,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)
static u32 timeline_get_seqno(struct i915_timeline *tl) static u32 timeline_get_seqno(struct i915_timeline *tl)
{ {
return ++tl->seqno; return tl->seqno += 1 + tl->has_initial_breadcrumb;
} }
static void move_to_timeline(struct i915_request *request, static void move_to_timeline(struct i915_request *request,
@ -382,8 +382,8 @@ void __i915_request_submit(struct i915_request *request)
intel_engine_enable_signaling(request, false); intel_engine_enable_signaling(request, false);
spin_unlock(&request->lock); spin_unlock(&request->lock);
engine->emit_breadcrumb(request, engine->emit_fini_breadcrumb(request,
request->ring->vaddr + request->postfix); request->ring->vaddr + request->postfix);
/* Transfer from per-context onto the global per-engine timeline */ /* Transfer from per-context onto the global per-engine timeline */
move_to_timeline(request, &engine->timeline); move_to_timeline(request, &engine->timeline);
@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* around inside i915_request_add() there is sufficient space at * around inside i915_request_add() there is sufficient space at
* the beginning of the ring as well. * the beginning of the ring as well.
*/ */
rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32); rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
/* /*
* Record the position of the start of the request so that * Record the position of the start of the request so that
@ -908,7 +908,7 @@ void i915_request_add(struct i915_request *request)
* GPU processing the request, we never over-estimate the * GPU processing the request, we never over-estimate the
* position of the ring's HEAD. * position of the ring's HEAD.
*/ */
cs = intel_ring_begin(request, engine->emit_breadcrumb_dw); cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
GEM_BUG_ON(IS_ERR(cs)); GEM_BUG_ON(IS_ERR(cs));
request->postfix = intel_ring_offset(request, cs); request->postfix = intel_ring_offset(request, cs);

View File

@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq)
if (i915_request_signaled(rq)) if (i915_request_signaled(rq))
return true; return true;
/* Remember: started but may have since been preempted! */
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
} }

View File

@ -135,6 +135,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
timeline->i915 = i915; timeline->i915 = i915;
timeline->name = name; timeline->name = name;
timeline->pin_count = 0; timeline->pin_count = 0;
timeline->has_initial_breadcrumb = !hwsp;
timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
if (!hwsp) { if (!hwsp) {

View File

@ -48,6 +48,8 @@ struct i915_timeline {
struct i915_vma *hwsp_ggtt; struct i915_vma *hwsp_ggtt;
u32 hwsp_offset; u32 hwsp_offset;
bool has_initial_breadcrumb;
/** /**
* List of breadcrumbs associated with GPU requests currently * List of breadcrumbs associated with GPU requests currently
* outstanding. * outstanding.

View File

@ -664,7 +664,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
if (dw < 0) if (dw < 0)
goto out_timeline; goto out_timeline;
dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
i915_timeline_unpin(&frame->timeline); i915_timeline_unpin(&frame->timeline);
@ -725,7 +725,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret < 0) if (ret < 0)
goto err_breadcrumbs; goto err_breadcrumbs;
engine->emit_breadcrumb_dw = ret; engine->emit_fini_breadcrumb_dw = ret;
return 0; return 0;
@ -1297,7 +1297,9 @@ static void print_request(struct drm_printer *m,
drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
prefix, prefix,
rq->global_seqno, rq->global_seqno,
i915_request_completed(rq) ? "!" : "", i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
rq->fence.context, rq->fence.seqno, rq->fence.context, rq->fence.seqno,
buf, buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies), jiffies_to_msecs(jiffies - rq->emitted_jiffies),

View File

@ -624,7 +624,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* WaIdleLiteRestore:bdw,skl * WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent * Apply the wa NOOPs to prevent
* ring:HEAD == rq:TAIL as we resubmit the * ring:HEAD == rq:TAIL as we resubmit the
* request. See gen8_emit_breadcrumb() for * request. See gen8_emit_fini_breadcrumb() for
* where we prepare the padding after the * where we prepare the padding after the
* end of the request. * end of the request.
*/ */
@ -1283,6 +1283,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
return __execlists_context_pin(engine, ctx, ce); return __execlists_context_pin(engine, ctx, ce);
} }
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Check if we have been preempted before we even get started.
*
* After this point i915_request_started() reports true, even if
* we get preempted and so are no longer running.
*/
*cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = rq->timeline->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
intel_ring_advance(rq, cs);
return 0;
}
static int emit_pdps(struct i915_request *rq) static int emit_pdps(struct i915_request *rq)
{ {
const struct intel_engine_cs * const engine = rq->engine; const struct intel_engine_cs * const engine = rq->engine;
@ -2039,7 +2067,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
return cs; return cs;
} }
static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{ {
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@ -2061,7 +2089,7 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs); return gen8_emit_wa_tail(request, cs);
} }
static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{ {
cs = gen8_emit_ggtt_write_rcs(cs, cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno, request->fence.seqno,
@ -2176,7 +2204,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->request_alloc = execlists_request_alloc; engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush; engine->emit_flush = gen8_emit_flush;
engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission; engine->set_default_submission = intel_execlists_set_default_submission;
@ -2289,7 +2318,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
/* Override some for render ring. */ /* Override some for render ring. */
engine->init_context = gen8_init_rcs_context; engine->init_context = gen8_init_rcs_context;
engine->emit_flush = gen8_emit_flush_render; engine->emit_flush = gen8_emit_flush_render;
engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
ret = logical_ring_init(engine); ret = logical_ring_init(engine);
if (ret) if (ret)

View File

@ -1607,6 +1607,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
err = PTR_ERR(timeline); err = PTR_ERR(timeline);
goto err; goto err;
} }
GEM_BUG_ON(timeline->has_initial_breadcrumb);
ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
i915_timeline_put(timeline); i915_timeline_put(timeline);
@ -1960,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret; int ret;
GEM_BUG_ON(!request->hw_context->pin_count); GEM_BUG_ON(!request->hw_context->pin_count);
GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
/* /*
* Flush enough space to reduce the likelihood of waiting after * Flush enough space to reduce the likelihood of waiting after
@ -2296,9 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->context_pin = intel_ring_context_pin; engine->context_pin = intel_ring_context_pin;
engine->request_alloc = ring_request_alloc; engine->request_alloc = ring_request_alloc;
engine->emit_breadcrumb = i9xx_emit_breadcrumb; /*
* Using a global execution timeline; the previous final breadcrumb is
* equivalent to our next initial bread so we can elide
* engine->emit_init_breadcrumb().
*/
engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
if (IS_GEN(dev_priv, 5)) if (IS_GEN(dev_priv, 5))
engine->emit_breadcrumb = gen5_emit_breadcrumb; engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
engine->set_default_submission = i9xx_set_default_submission; engine->set_default_submission = i9xx_set_default_submission;
@ -2327,11 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) >= 7) { if (INTEL_GEN(dev_priv) >= 7) {
engine->init_context = intel_rcs_ctx_init; engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush; engine->emit_flush = gen7_render_ring_flush;
engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
} else if (IS_GEN(dev_priv, 6)) { } else if (IS_GEN(dev_priv, 6)) {
engine->init_context = intel_rcs_ctx_init; engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen6_render_ring_flush; engine->emit_flush = gen6_render_ring_flush;
engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
} else if (IS_GEN(dev_priv, 5)) { } else if (IS_GEN(dev_priv, 5)) {
engine->emit_flush = gen4_render_ring_flush; engine->emit_flush = gen4_render_ring_flush;
} else { } else {
@ -2368,9 +2375,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) if (IS_GEN(dev_priv, 6))
engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
else else
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
} else { } else {
engine->emit_flush = bsd_ring_flush; engine->emit_flush = bsd_ring_flush;
if (IS_GEN(dev_priv, 5)) if (IS_GEN(dev_priv, 5))
@ -2394,9 +2401,9 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
if (IS_GEN(dev_priv, 6)) if (IS_GEN(dev_priv, 6))
engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
else else
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
return intel_init_ring_buffer(engine); return intel_init_ring_buffer(engine);
} }
@ -2414,7 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
engine->irq_enable = hsw_vebox_irq_enable; engine->irq_enable = hsw_vebox_irq_enable;
engine->irq_disable = hsw_vebox_irq_disable; engine->irq_disable = hsw_vebox_irq_disable;
engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
return intel_init_ring_buffer(engine); return intel_init_ring_buffer(engine);
} }

View File

@ -463,8 +463,10 @@ struct intel_engine_cs {
unsigned int dispatch_flags); unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_PINNED BIT(1)
u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int (*emit_init_breadcrumb)(struct i915_request *rq);
int emit_breadcrumb_dw; u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
u32 *cs);
unsigned int emit_fini_breadcrumb_dw;
/* Pass the request to the hardware queue (e.g. directly into /* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist). * the legacy ringbuffer or to the end of an execlist).

View File

@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.context_pin = mock_context_pin; engine->base.context_pin = mock_context_pin;
engine->base.request_alloc = mock_request_alloc; engine->base.request_alloc = mock_request_alloc;
engine->base.emit_flush = mock_emit_flush; engine->base.emit_flush = mock_emit_flush;
engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request; engine->base.submit_request = mock_submit_request;
if (i915_timeline_init(i915, if (i915_timeline_init(i915,