diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c499edb4c13..d92e7ab0005e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2871,6 +2871,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -2893,6 +2901,13 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) if (i915_request_completed(request)) continue; + if (!i915_request_started(request)) + break; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + break; + active = request; break; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f250109e1f66..8eedf7cac493 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1976,6 +1976,18 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4d58770e6a8c..7db15b7b3de8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -333,7 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq) static u32 timeline_get_seqno(struct i915_timeline *tl) { - return ++tl->seqno; + return tl->seqno += 1 + tl->has_initial_breadcrumb; } static void move_to_timeline(struct i915_request *request, @@ -382,8 +382,8 @@ void __i915_request_submit(struct i915_request *request) intel_engine_enable_signaling(request, false); spin_unlock(&request->lock); - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); @@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32); + rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -908,7 +908,7 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_dw); + cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); request->postfix = intel_ring_offset(request, cs); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 96c586d6ff4d..340d6216791c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq) if (i915_request_signaled(rq)) return true; + /* Remember: started but may have since been preempted! */ return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); } diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 79838d89bdb9..5ea3af393ffe 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -135,6 +135,7 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->i915 = i915; timeline->name = name; timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; if (!hwsp) { diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index ab736e2e5707..8caeb66d1cd5 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -48,6 +48,8 @@ struct i915_timeline { struct i915_vma *hwsp_ggtt; u32 hwsp_offset; + bool has_initial_breadcrumb; + /** * List of breadcrumbs associated with GPU requests currently * outstanding. diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ead9c4371fe1..8dca76f6315d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -664,7 +664,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (dw < 0) goto out_timeline; - dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs; + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; i915_timeline_unpin(&frame->timeline); @@ -725,7 +725,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret < 0) goto err_breadcrumbs; - engine->emit_breadcrumb_dw = ret; + engine->emit_fini_breadcrumb_dw = ret; return 0; @@ -1297,7 +1297,9 @@ static void print_request(struct drm_printer *m, drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, - i915_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fdbb3fe8eac9..5db16dd8e844 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -624,7 +624,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent * ring:HEAD == rq:TAIL as we resubmit the - * request. See gen8_emit_breadcrumb() for + * request. See gen8_emit_fini_breadcrumb() for * where we prepare the padding after the * end of the request. */ @@ -1283,6 +1283,34 @@ execlists_context_pin(struct intel_engine_cs *engine, return __execlists_context_pin(engine, ctx, ce); } +static int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = rq->timeline->hwsp_offset; + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + return 0; +} + static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; @@ -2039,7 +2067,7 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) return cs; } -static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -2061,7 +2089,7 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) +static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, @@ -2176,7 +2204,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; - engine->emit_breadcrumb = gen8_emit_breadcrumb; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -2289,7 +2318,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) /* Override some for render ring. */ engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; ret = logical_ring_init(engine); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ee3719324e2d..668ed67336a2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1607,6 +1607,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) err = PTR_ERR(timeline); goto err; } + GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); i915_timeline_put(timeline); @@ -1960,6 +1961,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!request->hw_context->pin_count); + GEM_BUG_ON(request->timeline->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -2296,9 +2298,14 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->request_alloc = ring_request_alloc; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; + /* + * Using a global execution timeline; the previous final breadcrumb is + * equivalent to our next initial bread so we can elide + * engine->emit_init_breadcrumb(). + */ + engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; if (IS_GEN(dev_priv, 5)) - engine->emit_breadcrumb = gen5_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; @@ -2327,11 +2334,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; - engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; - engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; } else if (IS_GEN(dev_priv, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { @@ -2368,9 +2375,9 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; if (IS_GEN(dev_priv, 6)) - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } else { engine->emit_flush = bsd_ring_flush; if (IS_GEN(dev_priv, 5)) @@ -2394,9 +2401,9 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; if (IS_GEN(dev_priv, 6)) - engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } @@ -2414,7 +2421,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; - engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2927b712b973..1f30ffb84936 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -463,8 +463,10 @@ struct intel_engine_cs { unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) - u32 *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs); - int emit_breadcrumb_dw; + int (*emit_init_breadcrumb)(struct i915_request *rq); + u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, + u32 *cs); + unsigned int emit_fini_breadcrumb_dw; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 95e890d7f58b..3b226ebc6bc4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.context_pin = mock_context_pin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; - engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; if (i915_timeline_init(i915,