diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 595539a09e38..261c9bd83f51 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -265,7 +265,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | - PIN_NOSEARCH); + PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a372d4ea9370..65b5ca74b394 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -39,27 +39,32 @@ static int __engine_unpark(struct intel_wakeref *wf) #if IS_ENABLED(CONFIG_LOCKDEP) -static inline void __timeline_mark_lock(struct intel_context *ce) +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { unsigned long flags; local_irq_save(flags); mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); - local_irq_restore(flags); + + return flags; } -static inline void __timeline_mark_unlock(struct intel_context *ce) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_); + local_irq_restore(flags); } #else -static inline void __timeline_mark_lock(struct intel_context *ce) +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { + return 0; } -static inline void __timeline_mark_unlock(struct intel_context *ce) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { } @@ -68,6 +73,8 @@ static inline void __timeline_mark_unlock(struct intel_context *ce) static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct i915_request *rq; + unsigned long flags; + bool result = true; /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) @@ -89,12 +96,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * retiring the last request, thus all rings should be empty and * all timelines idle. */ - __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(engine->kernel_context); rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ - return true; + goto out_unlock; intel_timeline_enter(rq->timeline); @@ -110,9 +117,10 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) __intel_wakeref_defer_park(&engine->wakeref); __i915_request_queue(rq, NULL); - __timeline_mark_unlock(engine->kernel_context); - - return false; + result = false; +out_unlock: + __timeline_mark_unlock(engine->kernel_context, flags); + return result; } static int __engine_park(struct intel_wakeref *wf) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 077716442c90..b9d84d52e986 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -792,6 +792,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; + unsigned long flags; if (!test_bit(I915_WEDGED, >->reset.flags)) return true; @@ -811,7 +812,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * * No more can be submitted until we reset the wedged bit. */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { struct i915_request *rq; @@ -819,7 +820,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!rq) continue; - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * All internal dependencies (i915_requests) will have @@ -832,10 +833,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) i915_request_put(rq); /* Restart iteration after droping lock */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); intel_gt_sanitize(gt, false); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 02fbe11b671b..9cb01d9828f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -337,6 +337,7 @@ int intel_timeline_pin(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; lockdep_assert_held(&tl->mutex); @@ -345,14 +346,15 @@ void intel_timeline_enter(struct intel_timeline *tl) return; GEM_BUG_ON(!tl->active_count); /* overflow? */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_add(&tl->link, &timelines->active_list); - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); } void intel_timeline_exit(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; lockdep_assert_held(&tl->mutex); @@ -360,9 +362,9 @@ void intel_timeline_exit(struct intel_timeline *tl) if (--tl->active_count) return; - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_del(&tl->link); - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * Since this timeline is idle, all bariers upon which we were waiting diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1a28e3666951..37f502ca95ad 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -568,6 +568,16 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } +static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base; + + ring_base = dev_priv->engine[workload->ring_id]->mmio_base; + vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start; +} + static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -1016,6 +1026,13 @@ static int workload_thread(void *priv) if (need_force_wake) intel_uncore_forcewake_get(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + /* + * Update the vReg of the vGPU which submitted this + * workload. The vGPU may use these registers for checking + * the context state. The value comes from GPU commands + * in this workload. + */ + update_vreg_in_ctx(workload); ret = dispatch_workload(workload); @@ -1438,9 +1455,6 @@ static int prepare_mm(struct intel_vgpu_workload *workload) #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) -#define get_last_workload(q) \ - (list_empty(q) ? NULL : container_of(q->prev, \ - struct intel_vgpu_workload, list)) /** * intel_vgpu_create_workload - create a vGPU workload * @vgpu: a vGPU @@ -1460,7 +1474,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, { struct intel_vgpu_submission *s = &vgpu->submission; struct list_head *q = workload_q_head(vgpu, ring_id); - struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *last_workload = NULL; struct intel_vgpu_workload *workload = NULL; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; u64 ring_context_gpa; @@ -1486,15 +1500,20 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, head &= RB_HEAD_OFF_MASK; tail &= RB_TAIL_OFF_MASK; - if (last_workload && same_context(&last_workload->ctx_desc, desc)) { - gvt_dbg_el("ring id %d cur workload == last\n", ring_id); - gvt_dbg_el("ctx head %x real head %lx\n", head, - last_workload->rb_tail); - /* - * cannot use guest context head pointer here, - * as it might not be updated at this time - */ - head = last_workload->rb_tail; + list_for_each_entry_reverse(last_workload, q, list) { + + if (same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", + ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + break; + } } gvt_dbg_el("ring id %d begin a new workload\n", ring_id); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d725e0bba40..772154e4073e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu { + struct mutex lock; /* serialises sending of g2v_notify command pkts */ bool active; u32 caps; }; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eb31b69a316a..95e7c52cf8ed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -889,12 +889,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) static long wait_for_timelines(struct drm_i915_private *i915, - unsigned int flags, long timeout) + unsigned int wait, long timeout) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl; + unsigned long flags; - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { struct i915_request *rq; @@ -902,7 +903,7 @@ wait_for_timelines(struct drm_i915_private *i915, if (!rq) continue; - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * "Race-to-idle". @@ -913,19 +914,19 @@ wait_for_timelines(struct drm_i915_private *i915, * want to complete as quickly as possible to avoid prolonged * stalls, so allow the gpu to boost to maximum clocks. */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) + if (wait & I915_WAIT_FOR_IDLE_BOOST) gen6_rps_boost(rq); - timeout = i915_request_wait(rq, flags, timeout); + timeout = i915_request_wait(rq, wait, timeout); i915_request_put(rq); if (timeout < 0) return timeout; /* restart after reacquiring the lock */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); return timeout; } @@ -1026,6 +1027,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } + if (vma->fence && !i915_gem_object_is_tiled(obj)) { + mutex_lock(&vma->vm->mutex); + ret = i915_vma_revoke_fence(vma); + mutex_unlock(&vma->vm->mutex); + if (ret) + return ERR_PTR(ret); + } + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0b81e0b64393..b1a7a8b9b46a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -827,10 +827,9 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) ppgtt->pd_dirty_engines = ALL_ENGINES; } -static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { - struct i915_address_space *vm = &ppgtt->vm; - struct drm_i915_private *dev_priv = vm->i915; + struct drm_i915_private *dev_priv = ppgtt->vm.i915; enum vgt_g2v_type msg; int i; @@ -839,7 +838,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) else atomic_dec(px_used(ppgtt->pd)); - if (i915_vm_is_4lvl(vm)) { + mutex_lock(&dev_priv->vgpu.lock); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { const u64 daddr = px_dma(ppgtt->pd); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); @@ -859,9 +860,10 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); } + /* g2v_notify atomically (via hv trap) consumes the message packet. */ I915_WRITE(vgtif_reg(g2v_notify), msg); - return 0; + mutex_unlock(&dev_priv->vgpu.lock); } /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f1a0a57fc6fc..a53777dd371c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1465,9 +1465,10 @@ bool i915_retire_requests(struct drm_i915_private *i915) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl, *tn; + unsigned long flags; LIST_HEAD(free); - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) continue; @@ -1475,11 +1476,11 @@ bool i915_retire_requests(struct drm_i915_private *i915) intel_timeline_get(tl); GEM_BUG_ON(!tl->active_count); tl->active_count++; /* pin the list element */ - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); retire_requests(tl); - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); @@ -1494,7 +1495,7 @@ bool i915_retire_requests(struct drm_i915_private *i915) list_add(&tl->link, &free); } } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 39bebf16edbe..968be26735c5 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -94,6 +94,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv) dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps)); dev_priv->vgpu.active = true; + mutex_init(&dev_priv->vgpu.lock); DRM_INFO("Virtual GPU for Intel GVT-g detected.\n"); out: