drm/i915/guc: Don't hog IRQs when destroying contexts
While attempting to debug a CT deadlock issue in various CI failures (most easily reproduced with gem_ctx_create/basic-files), I was seeing CPU deadlock errors being reported. This were because the context destroy loop was blocking waiting on H2G space from inside an IRQ spinlock. There no was deadlock as such, it's just that the H2G queue was full of context destroy commands and GuC was taking a long time to process them. However, the kernel was seeing the large amount of time spent inside the IRQ lock as a dead CPU. Various Bad Things(tm) would then happen (heartbeat failures, CT deadlock errors, outstanding H2G WARNs, etc.). Re-working the loop to only acquire the spinlock around the list management (which is all it is meant to protect) rather than the entire destroy operation seems to fix all the above issues. v2: (John Harrison) - Fix typo in comment message Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-5-matthew.brost@intel.com
This commit is contained in:
parent
7aa6d5fe6c
commit
2406846ec4
|
@ -2644,7 +2644,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
|
|||
unsigned long flags;
|
||||
bool disabled;
|
||||
|
||||
lockdep_assert_held(&guc->submission_state.lock);
|
||||
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
|
||||
GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
|
||||
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
|
||||
|
@ -2660,7 +2659,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
|
|||
}
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
if (unlikely(disabled)) {
|
||||
__release_guc_id(guc, ce);
|
||||
release_guc_id(guc, ce);
|
||||
__guc_context_destroy(ce);
|
||||
return;
|
||||
}
|
||||
|
@ -2694,36 +2693,48 @@ static void __guc_context_destroy(struct intel_context *ce)
|
|||
|
||||
static void guc_flush_destroyed_contexts(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_context *ce, *cn;
|
||||
struct intel_context *ce;
|
||||
unsigned long flags;
|
||||
|
||||
GEM_BUG_ON(!submission_disabled(guc) &&
|
||||
guc_submission_initialized(guc));
|
||||
|
||||
spin_lock_irqsave(&guc->submission_state.lock, flags);
|
||||
list_for_each_entry_safe(ce, cn,
|
||||
&guc->submission_state.destroyed_contexts,
|
||||
destroyed_link) {
|
||||
list_del_init(&ce->destroyed_link);
|
||||
__release_guc_id(guc, ce);
|
||||
while (!list_empty(&guc->submission_state.destroyed_contexts)) {
|
||||
spin_lock_irqsave(&guc->submission_state.lock, flags);
|
||||
ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
|
||||
struct intel_context,
|
||||
destroyed_link);
|
||||
if (ce)
|
||||
list_del_init(&ce->destroyed_link);
|
||||
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
|
||||
|
||||
if (!ce)
|
||||
break;
|
||||
|
||||
release_guc_id(guc, ce);
|
||||
__guc_context_destroy(ce);
|
||||
}
|
||||
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
|
||||
}
|
||||
|
||||
static void deregister_destroyed_contexts(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_context *ce, *cn;
|
||||
struct intel_context *ce;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&guc->submission_state.lock, flags);
|
||||
list_for_each_entry_safe(ce, cn,
|
||||
&guc->submission_state.destroyed_contexts,
|
||||
destroyed_link) {
|
||||
list_del_init(&ce->destroyed_link);
|
||||
while (!list_empty(&guc->submission_state.destroyed_contexts)) {
|
||||
spin_lock_irqsave(&guc->submission_state.lock, flags);
|
||||
ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
|
||||
struct intel_context,
|
||||
destroyed_link);
|
||||
if (ce)
|
||||
list_del_init(&ce->destroyed_link);
|
||||
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
|
||||
|
||||
if (!ce)
|
||||
break;
|
||||
|
||||
guc_lrc_desc_unpin(ce);
|
||||
}
|
||||
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
|
||||
}
|
||||
|
||||
static void destroyed_worker_func(struct work_struct *w)
|
||||
|
|
Loading…
Reference in New Issue