From 84d4ebdb6c72988a1acddb1856b12427a287149d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 6 Mar 2018 12:41:55 +0200 Subject: [PATCH 01/10] drm/i915/icl: do not save DDI A/E sharing bit for ICL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't want to preserve the DDI A 4 lane bit on ICL. Fixes: 3d2011cfa41f ("drm/i915/icl: remove port A/E lane sharing limitation.") Cc: Mahesh Kumar Cc: Paulo Zanoni Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180306104155.3526-1-jani.nikula@intel.com (cherry picked from commit 1e6aa7e55c28ecd842b8b4599e4273c2429ee061) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ddi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ac8fc2a44ac6..dbcf1a0586f9 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -3080,9 +3080,12 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; - intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & - (DDI_BUF_PORT_REVERSAL | - DDI_A_4_LANES); + if (INTEL_GEN(dev_priv) >= 11) + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + DDI_BUF_PORT_REVERSAL; + else + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port); From 7e9d3a4a1b21fd8a595774697f4047e7505deea6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Mar 2018 13:42:25 +0000 Subject: [PATCH 02/10] drm/i915: Wrap engine->schedule in RCU locks for set-wedge protection Similar to the staging around handling of engine->submit_request, we need to stop adding to the execlists->queue prior to calling engine->cancel_requests. cancel_requests will move requests from the queue onto the timeline, so if we add a request onto the queue after that point, it will be lost. Fixes: af7a8ffad9c5 ("drm/i915: Use rcu instead of stop_machine in set_wedged") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180307134226.25492-5-chris@chris-wilson.co.uk (cherry picked from commit 47650db02dd52267953df81438c93cf8a0eb0e5e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 13 +++++++------ drivers/gpu/drm/i915/i915_request.c | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a5bd07338b46..8d913d833ab9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -471,10 +471,11 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - if (!engine->schedule) - return; - engine->schedule(rq, prio); + rcu_read_lock(); + if (engine->schedule) + engine->schedule(rq, prio); + rcu_read_unlock(); } static void fence_set_priority(struct dma_fence *fence, int prio) @@ -3214,8 +3215,11 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) */ for_each_engine(engine, i915, id) { i915_gem_reset_prepare_engine(engine); + engine->submit_request = nop_submit_request; + engine->schedule = NULL; } + i915->caps.scheduler = 0; /* * Make sure no one is running the old callback before we proceed with @@ -3233,11 +3237,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * start to complete all requests. */ engine->submit_request = nop_complete_submit_request; - engine->schedule = NULL; } - i915->caps.scheduler = 0; - /* * Make sure no request can slip through without getting completed by * either this call here to intel_engine_init_global_seqno, or the one diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d437beac3969..282f57630cc1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1081,8 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ + rcu_read_lock(); if (engine->schedule) engine->schedule(request, request->ctx->priority); + rcu_read_unlock(); local_bh_disable(); i915_sw_fence_commit(&request->submit); From 73e2232aa3253d77935112bfc218700f6a2f1000 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Mar 2018 13:42:26 +0000 Subject: [PATCH 03/10] drm/i915: Only call tasklet_kill() on the first prepare_reset tasklet_kill() will spin waiting for the current tasklet to be executed. However, if tasklet_disable() has been called, then the tasklet is never executed but permanently put back onto the runlist until tasklet_enable() is called. Ergo, we cannot use tasklet_kill() inside a disable/enable pair. This is the case when we call set-wedge from inside i915_reset(), and another request was submitted to us concurrent to the reset. Fixes: 963ddd63c314 ("drm/i915: Suspend submission tasklets around wedging") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180307134226.25492-6-chris@chris-wilson.co.uk (cherry picked from commit 68ad361285a9cc73b259f59adbaafde196c15987) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8d913d833ab9..0359d6f870b4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2940,8 +2940,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) * calling engine->init_hw() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. + * + * Note that this needs to be a single atomic operation on the + * tasklet (flush existing tasks, prevent new tasks) to prevent + * a race between reset and set-wedged. It is not, so we do the best + * we can atm and make sure we don't lock the machine up in the more + * common case of recursively being called from set-wedged from inside + * i915_reset. */ - tasklet_kill(&engine->execlists.tasklet); + if (!atomic_read(&engine->execlists.tasklet.count)) + tasklet_kill(&engine->execlists.tasklet); tasklet_disable(&engine->execlists.tasklet); /* From 22de4e7a531b623962e62ee6d3a39a7e51bdf90e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 14 Mar 2018 08:05:35 +0000 Subject: [PATCH 04/10] drm/i915/pmu: Work around compiler warnings on some kernel configs Arnd Bergman reports: """ The conditional spinlock confuses gcc into thinking the 'flags' value might contain uninitialized data: drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read': arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is correct, but it's easy to see how the compiler gets confused here. This avoids the problem by pulling the lock outside of the function into its only caller. """ On deeper look it seems this is caused by paravirt spinlocks implementation when CONFIG_PARAVIRT_DEBUG is set, which by being complicated, manages to convince gcc locked parameter can be changed externally (impossible). Work around it by removing the conditional locking parameters altogether. (It was never the most elegant code anyway.) Slight penalty we now pay is an additional irqsave spin lock/unlock cycle on the event enable path. But since enable is not a fast path, that is preferrable to the alternative solution which was doing MMIO under irqsave spinlock. Signed-off-by: Tvrtko Ursulin Reported-by: Arnd Bergmann Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout") Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Imre Deak Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180314080535.17490-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit ad055fb8e010e4ff37f66aeed1d380329bddce67) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 964467b03e4d..d8feb9053e0c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -433,7 +433,7 @@ static u64 __get_rc6(struct drm_i915_private *i915) return val; } -static u64 get_rc6(struct drm_i915_private *i915, bool locked) +static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) unsigned long flags; @@ -449,8 +449,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * previously. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&i915->pmu.lock, flags); if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; @@ -459,12 +458,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; } - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } else { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - unsigned long flags2; /* * We are runtime suspended. @@ -473,10 +470,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * on top of the last known real value, as the approximated RC6 * counter value. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); - - spin_lock_irqsave(&kdev->power.lock, flags2); + spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock(&kdev->power.lock); if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) i915->pmu.suspended_jiffies_last = @@ -486,14 +481,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) i915->pmu.suspended_jiffies_last; val += jiffies - kdev->power.accounting_timestamp; - spin_unlock_irqrestore(&kdev->power.lock, flags2); + spin_unlock(&kdev->power.lock); val = jiffies_to_nsecs(val); val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } return val; @@ -502,7 +496,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) #endif } -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) +static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); @@ -540,7 +534,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915, locked); + val = get_rc6(i915); break; } } @@ -555,7 +549,7 @@ static void i915_pmu_event_read(struct perf_event *event) again: prev = local64_read(&hwc->prev_count); - new = __i915_pmu_event_read(event, false); + new = __i915_pmu_event_read(event); if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) goto again; @@ -605,14 +599,14 @@ static void i915_pmu_enable(struct perf_event *event) engine->pmu.enable_count[sample]++; } + spin_unlock_irqrestore(&i915->pmu.lock, flags); + /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has * an existing non-zero value. */ - local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); - - spin_unlock_irqrestore(&i915->pmu.lock, flags); + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); } static void i915_pmu_disable(struct perf_event *event) From b2744f862cf63c557be92ee466c6bcfddc230323 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Sat, 17 Mar 2018 15:31:02 +0800 Subject: [PATCH 05/10] drm/i915/gvt: Remove reduntant printing of untracked mmio Reduntant message prints when: - linux guest creating. - dma-buf win10 guest boot. - xonotic stress testing in linux guest. Add below registers to default MMIO handler: 0xd00, RPM_CONFIG0 0xd40, RC6_LOCATION 0x65010, HSW_AUD_MISC_CTRL 0x6671c, 0x700a0, CUR_FBC_CTL 0x7239c, v2: - Should init i915_reg_t using uint32_t instead of _MMIO macro. (compiling errors) - Use defined offset in i915_reg.h (zhenyu) Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 112f2ec7c25f..8c5d5d005854 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1767,6 +1767,10 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(CURBASE(PIPE_B), D_ALL); MMIO_D(CURBASE(PIPE_C), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_A), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_B), D_ALL); + MMIO_D(CUR_FBC_CTL(PIPE_C), D_ALL); + MMIO_D(_MMIO(0x700ac), D_ALL); MMIO_D(_MMIO(0x710ac), D_ALL); MMIO_D(_MMIO(0x720ac), D_ALL); @@ -2228,6 +2232,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL); MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL); + MMIO_D(HSW_AUD_MISC_CTRL(PIPE_A), D_ALL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL); MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL); @@ -2559,6 +2564,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(WM_MISC, D_BDW); MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); MMIO_D(_MMIO(0x66c04), D_BDW_PLUS); @@ -2787,6 +2793,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_MMIO(0x70380), D_SKL_PLUS); MMIO_D(_MMIO(0x71380), D_SKL_PLUS); MMIO_D(_MMIO(0x72380), D_SKL_PLUS); + MMIO_D(_MMIO(0x7239c), D_SKL_PLUS); MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); @@ -2801,7 +2808,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_D(RPM_CONFIG0, D_SKL_PLUS); MMIO_D(_MMIO(0xd08), D_SKL_PLUS); + MMIO_D(RC6_LOCATION, D_SKL_PLUS); MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); From 84f69ba081b8d8358f725f2a7fd12c1b6f43d445 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Mar 2018 12:43:58 +0100 Subject: [PATCH 06/10] drm/i915/gvt: fix spelling mistake: "destoried" -> "destroyed" Trivial fix to spelling mistake in gvt_err error message text. Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 0a100a288e6d..8eb76becd676 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2046,7 +2046,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) } if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) - gvt_err("vgpu ppgtt mm is not fully destoried\n"); + gvt_err("vgpu ppgtt mm is not fully destroyed\n"); if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { gvt_err("Why we still has spt not freed?\n"); From 5cd4223ea3bd29e4a17d0bd8f2adbe50e50b2e57 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 12 Mar 2018 15:12:34 +0800 Subject: [PATCH 07/10] drm/i915/kvmgt: Handle kzalloc failure Out-of-memory error must be handled correctly. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 8a428678e4b5..520fe3d0a882 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -184,7 +184,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) return NULL; } -static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, +static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, dma_addr_t dma_addr) { struct gvt_dma *new, *itr; @@ -192,7 +192,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) - return; + return -ENOMEM; new->vgpu = vgpu; new->gfn = gfn; @@ -229,6 +229,7 @@ static void __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache); vgpu->vdev.nr_cache_entries++; + return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, @@ -1586,11 +1587,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { ret = gvt_dma_map_page(vgpu, gfn, dma_addr); - if (ret) { - mutex_unlock(&info->vgpu->vdev.cache_lock); - return ret; - } - __gvt_cache_add(info->vgpu, gfn, *dma_addr); + if (ret) + goto err_unlock; + + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + if (ret) + goto err_unmap; } else { kref_get(&entry->ref); *dma_addr = entry->dma_addr; @@ -1598,6 +1600,12 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, mutex_unlock(&info->vgpu->vdev.cache_lock); return 0; + +err_unmap: + gvt_dma_unmap_page(vgpu, gfn, *dma_addr); +err_unlock: + mutex_unlock(&info->vgpu->vdev.cache_lock); + return ret; } static void __gvt_dma_release(struct kref *ref) From 730c8ead53bf3011d33de69ff5a6cebf51e697b5 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 7 Feb 2018 18:12:14 +0800 Subject: [PATCH 08/10] drm/i915/gvt: Invalidate vGPU PPGTT mm objects during a vGPU reset. As different OSes might handling GVT PPGTT creation/destroy notification differently during a vGPU reset. A better approach is invalidating all vGPU PPGTT mm objects during vGPU reset. Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gtt.h | 1 + drivers/gpu/drm/i915/gvt/vgpu.c | 1 + 3 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8eb76becd676..d29281231507 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2290,6 +2290,28 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt) clean_spt_oos(gvt); } +/** + * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances + * @vgpu: a vGPU + * + * This function is called when invalidate all PPGTT instances of a vGPU. + * + */ +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); + if (mm->type == INTEL_GVT_MM_PPGTT) { + list_del_init(&mm->ppgtt_mm.lru_list); + if (mm->ppgtt_mm.shadowed) + invalidate_ppgtt_mm(mm); + } + } +} + /** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index e831507e17c3..a8b369cd352b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -194,6 +194,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41f76e86aa1f..2e0a02a80fe4 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -522,6 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); + intel_vgpu_invalidate_ppgtt(vgpu); /*fence will not be reset during virtual reset */ if (dmlr) { intel_vgpu_reset_gtt(vgpu); From b20c0d5ce1047ba03a6709a07f31f4d7178de35c Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 7 Feb 2018 18:12:15 +0800 Subject: [PATCH 09/10] drm/i915/gvt: Update PDPs after a vGPU mm object is pinned. The PDPs of a shadow page will only be valid after a vGPU mm is pinned. So the PDPs in the shadow context should be updated then. Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9b92b4e25a20..1127bd77fc6e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -52,6 +52,29 @@ static void set_context_pdp_root_pointer( pdp_pair[i].val = pdp[7 - i]; } +static void update_shadow_pdps(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; + + if (WARN_ON(!workload->shadow_mm)) + return; + + if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) + return; + + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap(page); + set_context_pdp_root_pointer(shadow_ring_context, + (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); + kunmap(page); +} + static int populate_shadow_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; @@ -112,9 +135,6 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) } #undef COPY_REG - set_context_pdp_root_pointer(shadow_ring_context, - (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); - intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa + sizeof(*shadow_ring_context), @@ -509,6 +529,8 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + update_shadow_pdps(workload); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); From d8303075699292008ae5b2c8fc728d455b994c26 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 19 Mar 2018 17:09:05 +0800 Subject: [PATCH 10/10] drm/i915/gvt: force to set all context control bits from guest Our shadow context content is from guest but with masked control reg like CTX_CONTEXT_CONTROL, we need to make sure all settings from guest would be set when this context is on hw, this trys to force mask enable bits for all to ensure every bits setting would be effective on hw. One regression found related to once inhibit bit is set, gpu engine are working on inhibit state until MI_LOAD_REG_IMM command or context image clear inhibit bit with mask bit set to 1, and val bit set to 0. In gvt-g currently workload has the highest priority, so gvt-g workload could trigger preempt context easily, preempt context set inhibit bit, then gvt-g workload is scheduled in, but gvt-g workload shadow context image usually doesn't set inhibit mask bit, so gpu is still in inhibit state when gvt workload is running. This caused gpu hang. Suggested-by: Zhang, Xiong Signed-off-by: Zhenyu Wang Reviewed-by: Zhang, Xiong --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1127bd77fc6e..a55b4975c154 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -124,8 +124,14 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) +#define COPY_REG_MASKED(name) {\ + intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ + + RING_CTX_OFF(name.val),\ + &shadow_ring_context->name.val, 4);\ + shadow_ring_context->name.val |= 0xffff << 16;\ + } - COPY_REG(ctx_ctrl); + COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); if (ring_id == RCS) { @@ -134,6 +140,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG(rcs_indirect_ctx_offset); } #undef COPY_REG +#undef COPY_REG_MASKED intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa +