diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3d1d48bf90cf..6dd18f93d45c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -334,6 +334,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ewma__engine_latency_init(&engine->latency); seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1481,6 +1482,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); drm_printf(m, "\tBarriers?: %s\n", yesno(!llist_empty(&engine->barrier_tasks))); + drm_printf(m, "\tLatency: %luus\n", + ewma__engine_latency_read(&engine->latency)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index bcbda8e52d41..8fb7b34fc5a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,6 +73,15 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_request *rq = to_request(fence); + + ewma__engine_latency_add(&rq->engine->latency, + ktime_us_delta(rq->fence.timestamp, + rq->duration.emitted)); +} + static void __queue_and_release_pm(struct i915_request *rq, struct intel_timeline *tl, @@ -163,7 +172,18 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Install ourselves as a preemption barrier */ rq->sched.attr.priority = I915_PRIORITY_BARRIER; - __i915_request_commit(rq); + if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ + /* + * Use an interrupt for precise measurement of duration, + * otherwise we rely on someone else retiring all the requests + * which may delay the signaling (i.e. we will likely wait + * until the background request retirement running every + * second or two). + */ + BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); + dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); + rq->duration.emitted = ktime_get(); + } /* Expose ourselves to the world */ __queue_and_release_pm(rq, ce->timeline, engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 17f1f1441efc..7f227da09d66 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_ENGINE_TYPES__ #define __INTEL_ENGINE_TYPES__ +#include #include #include #include @@ -119,6 +120,9 @@ enum intel_engine_id { #define INVALID_ENGINE ((enum intel_engine_id)-1) }; +/* A simple estimator for the round-trip latency of an engine */ +DECLARE_EWMA(_engine_latency, 6, 4) + struct st_preempt_hang { struct completion completion; unsigned int count; @@ -316,6 +320,13 @@ struct intel_engine_cs { struct intel_timeline *timeline; } legacy; + /* + * We track the average duration of the idle pulse on parking the + * engine to keep an estimate of the how the fast the engine is + * under ideal conditions. + */ + struct ewma__engine_latency latency; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index aa38290eea3d..c18c0bcd0193 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -150,6 +150,10 @@ struct i915_request { union { wait_queue_entry_t submitq; struct i915_sw_dma_fence_cb dmaq; + struct i915_request_duration_cb { + struct dma_fence_cb cb; + ktime_t emitted; + } duration; }; struct list_head execute_cb; struct i915_sw_fence semaphore;