From 60456d5c2d25cbe91cd0747d12ec9d909bf8d5b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Sep 2017 16:34:09 +0100 Subject: [PATCH 001/145] drm/i915/selftests: Replace wmb() with i915_gem_chipset_flush() Currently, we are being fairly lazy and only using a wmb() following an update to an active batch. Previously, we have found that to be insufficient to ensure that a write from the CPU reaches memory in a timely fashion, and in some caches we may need to flush a chipset cache. To that end, we have i915_gem_chipset_flush() so use it. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20170926153409.7928-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 10 +++++++--- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6664cb2eb0b8..78b9f811707f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -418,7 +418,10 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) err = PTR_ERR(cmd); goto err; } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -605,8 +608,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) *cmd++ = lower_32_bits(vma->node.start); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); - wmb(); i915_gem_object_unpin_map(obj); return vma; @@ -625,7 +628,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(batch->vm->i915); i915_gem_object_unpin_map(batch->obj); @@ -858,7 +861,8 @@ out_request: I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(request[id]->batch->obj); } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 377c1de766ce..08159b268893 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -165,6 +165,7 @@ static int emit_recurse_batch(struct hang *h, *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + i915_gem_chipset_flush(h->i915); flags = 0; if (INTEL_GEN(vm->i915) <= 5) @@ -231,7 +232,7 @@ static u32 hws_seqno(const struct hang *h, static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(h->i915); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -275,6 +276,8 @@ static int igt_hang_sanitycheck(void *arg) i915_gem_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + __i915_add_request(rq, true); timeout = i915_wait_request(rq, @@ -765,7 +768,7 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); i915_gem_request_put(prev); } From 269e6ea95311130f75c4eb21bf1797a4cadfb82d Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Fri, 29 Sep 2017 10:28:36 +0530 Subject: [PATCH 002/145] drm/i915: Move i915_gem_restore_fences to i915_gem_resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915_gem_restore_fences is GEM resumption task hence it is moved to i915_gem_resume from i915_restore_state. Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/1506661116-12106-1-git-send-email-sagar.a.kamble@intel.com Reviewed-by: Michal Wajdeczko Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_suspend.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73eeb6b1f1cd..ab8c6946fea4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4595,6 +4595,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); /* As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5c86925a0294..8f3aa4dc0c98 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -108,8 +108,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_restore_fences(dev_priv); - if (IS_GEN4(dev_priv)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); From 7e4992ac045ccdef9d5a0b4800ee8da43b5a809a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Sep 2017 20:38:59 +0100 Subject: [PATCH 003/145] drm/i915/execlists: Move request unwinding to a separate function In the future, we will want to unwind requests following a preemption point. This requires the same steps as for unwinding upon a reset, so extract the existing code to a separate function for later use. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170928193910.17988-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 54 ++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 61cac26a8b05..cbac2fff8e4d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -210,6 +210,7 @@ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 +#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -348,6 +349,37 @@ find_priolist: return ptr_pack_bits(p, first, 1); } +static void unwind_wa_tail(struct drm_i915_gem_request *rq) +{ + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); + assert_ring_tail_valid(rq->ring, rq->tail); +} + +static void unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *rq, *rn; + + lockdep_assert_held(&engine->timeline->lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->timeline->requests, + link) { + struct i915_priolist *p; + + if (i915_gem_request_completed(rq)) + return; + + __i915_gem_request_unsubmit(rq); + unwind_wa_tail(rq); + + p = lookup_priolist(engine, + &rq->priotree, + rq->priotree.priority); + list_add(&rq->priotree.link, + &ptr_mask_bits(p, 1)->requests); + } +} + static inline void execlists_context_status_change(struct drm_i915_gem_request *rq, unsigned long status) @@ -1382,7 +1414,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; struct intel_context *ce; unsigned long flags; @@ -1400,21 +1431,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, execlist_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, link) { - struct i915_priolist *p; - - if (i915_gem_request_completed(rq)) - break; - - __i915_gem_request_unsubmit(rq); - - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - list_add(&rq->priotree.link, - &ptr_mask_bits(p, 1)->requests); - } + unwind_incomplete_requests(engine); spin_unlock_irqrestore(&engine->timeline->lock, flags); @@ -1451,10 +1468,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = - intel_ring_wrap(request->ring, - request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); - assert_ring_tail_valid(request->ring, request->tail); + unwind_wa_tail(request); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) From 7d1ea609f67a7d17c287ffdc008c79e0e91bc581 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Sep 2017 20:39:00 +0100 Subject: [PATCH 004/145] drm/i915: Give the invalid priority a magic name We use INT_MIN to denote the priority of a request that has not been submitted to the scheduler; we treat INT_MIN as an invalid priority and initialise the request to it. Give the value a name so it stands out. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170928193910.17988-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen signalers_list); INIT_LIST_HEAD(&pt->waiters_list); INIT_LIST_HEAD(&pt->link); - pt->priority = INT_MIN; + pt->priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 96eb52471dad..6b9e992d01de 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -72,6 +72,7 @@ struct i915_priotree { #define I915_PRIORITY_MAX 1024 #define I915_PRIORITY_NORMAL 0 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) +#define I915_PRIORITY_INVALID INT_MIN }; struct i915_gem_capture_list { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cbac2fff8e4d..303bb2c0b3ce 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -863,6 +863,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) struct i915_dependency stack; LIST_HEAD(dfs); + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + if (prio <= READ_ONCE(request->priotree.priority)) return; @@ -911,7 +913,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == INT_MIN) { + if (request->priotree.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&request->priotree.link)); request->priotree.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) From 097a94815fb61ba4f184b3efd8f4e158116956d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 28 Sep 2017 20:39:01 +0100 Subject: [PATCH 005/145] drm/i915/execlists: Cache the last priolist lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the repeated rbtree lookup for each request as we unwind them by tracking the last priolist. v2: Fix up my unhelpful suggestion of using default_priolist. Signed-off-by: Michał Winiarski Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170928193910.17988-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 303bb2c0b3ce..7d6da130b184 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -358,25 +358,31 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) static void unwind_incomplete_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *rq, *rn; + struct i915_priolist *uninitialized_var(p); + int last_prio = I915_PRIORITY_INVALID; lockdep_assert_held(&engine->timeline->lock); list_for_each_entry_safe_reverse(rq, rn, &engine->timeline->requests, link) { - struct i915_priolist *p; - if (i915_gem_request_completed(rq)) return; __i915_gem_request_unsubmit(rq); unwind_wa_tail(rq); - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - list_add(&rq->priotree.link, - &ptr_mask_bits(p, 1)->requests); + GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); + if (rq->priotree.priority != last_prio) { + p = lookup_priolist(engine, + &rq->priotree, + rq->priotree.priority); + p = ptr_mask_bits(p, 1); + + last_prio = rq->priotree.priority; + } + + list_add(&rq->priotree.link, &p->requests); } } From dd9f31c7a3887950cbd0d49eb9d43f7a1518a356 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 16 Aug 2017 17:46:07 +0300 Subject: [PATCH 006/145] drm/i915/gen9+: Set same power state before hibernation image save/restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, on GEN9 big core platforms before saving the hibernation image we uninitialize the display, disabling power wells manually, while before restoring the image we keep things powered (letting HW/DMC power down things as needed). The state mismatch will trigger the following error: DC state mismatch (0x0 -> 0x2) While the restore handler knows how to initialize the display from an unknown state (due to a different loader kernel or not having i915 loaded in the loader kernel) we should still use the same state for consistency before image saving and restoring. Do this by uniniting the display before restoring the image too. Bugzilla: https://bugs.freedesktop.org/attachment.cgi?id=133376 Reported-and-tested-by: Wang Wendy Reported-and-tested-by: Joonas Lahtinen Cc: Wang Wendy Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Ville Syrjala Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170816144607.9935-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59ac9199b35d..5cc24344c266 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1580,7 +1580,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && + fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually @@ -2070,11 +2070,14 @@ static int i915_pm_resume(struct device *kdev) /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend(dev); + if (ret) + return ret; + } ret = i915_gem_freeze(kdev_to_i915(kdev)); if (ret) @@ -2085,11 +2088,14 @@ static int i915_pm_freeze(struct device *kdev) static int i915_pm_freeze_late(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend_late(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend_late(dev, true); + if (ret) + return ret; + } ret = i915_gem_freeze_late(kdev_to_i915(kdev)); if (ret) From 9dfe2e3ad375a9ba32a13888873ec4586be01ff7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 28 Sep 2017 13:06:24 +0300 Subject: [PATCH 007/145] drm/i915/gen8+: Init/reset display interrupts only if i915 IRQs are enabled Only init / reset the display interrupts during power well enabling / disabling if the i915 interrupts are enabled. So far we did the init / reset during driver loading / resuming too, where initialization / enabling of the i915 interrupts happens only at a later point. This didn't cause a problem due to GEN8_MASTER_IRQ_CONTROL being cleared, but triggered gen3_assert_iir_is_zero() in GEN8_IRQ_INIT_NDX(). References: https://bugs.freedesktop.org/show_bug.cgi?id=102988 Cc: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170928100624.15533-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index efd7827ff181..e5997e818673 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3163,10 +3163,17 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], ~dev_priv->de_irq_mask[pipe] | extra_ier); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -3176,8 +3183,15 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ From 495001c6457124e553ebeec30a42e75367724e4b Mon Sep 17 00:00:00 2001 From: David Weinehall Date: Tue, 8 Aug 2017 13:09:52 +0300 Subject: [PATCH 008/145] drm/i915: Add has_psr-flag to gen9lp While testing Jim Bride's latest batch of PSR patches I noticed that gen9lp doesn't include the has_psr flag, and that our GLK system thus reported PSR as unsupported. This patch simply adds has_psr. Signed-off-by: David Weinehall Acked-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20170808100952.26448-1-david.weinehall@linux.intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index da60866b6628..17823980b40d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -463,6 +463,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_ddi = 1, \ .has_fpga_dbg = 1, \ .has_fbc = 1, \ + .has_psr = 1, \ .has_runtime_pm = 1, \ .has_pooled_eu = 0, \ .has_csr = 1, \ From ed69cd40685c949ec9c65701758bbf9e6840240f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 2 Oct 2017 10:55:57 +0300 Subject: [PATCH 009/145] drm/i915/glk, cnl: Implement WaDisableScalarClockGating On GLK and CNL enabling a pipe with its pipe scaler enabled will result in a FIFO underrun. This happens only once after driver loading or system/runtime resume, more specifically after power well 1 gets enabled; subsequent modesets seem to be free of underruns. The BSpec workaround for this is to disable the pipe scaler clock gating for the duration of modeset. Based on my tests disabling clock gating must be done before enabling pipe scaling and we can re-enable it after the pipe is enabled and one vblank has passed. For consistency I also checked if plane scaling would cause the same problem, but that doesn't seem to trigger this problem. The patch is based on an earlier version from Ander. v2 (Rodrigo): - Set also CLKGATE_DIS_PSL bits 8 and 9. - Add also the BSpec workaround ID. Cc: Ander Conselvan de Oliveira Cc: Rodrigo Vivi Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100302 Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171002075557.32615-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 10 ++++++++++ drivers/gpu/drm/i915/intel_display.c | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ee0d4f14ac98..39ad9327e2a0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3819,6 +3819,16 @@ enum { #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) +#define _CLKGATE_DIS_PSL_A 0x46520 +#define _CLKGATE_DIS_PSL_B 0x46524 +#define _CLKGATE_DIS_PSL_C 0x46528 +#define DPF_GATING_DIS (1 << 10) +#define DPF_RAM_GATING_DIS (1 << 9) +#define DPFR_GATING_DIS (1 << 8) + +#define CLKGATE_DIS_PSL(pipe) \ + _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B) + /* * GEN10 clock gating regs */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c4b224a3a0ee..b7a6ddc6a66d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5459,6 +5459,20 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc) return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; } +static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, + enum pipe pipe, bool apply) +{ + u32 val = I915_READ(CLKGATE_DIS_PSL(pipe)); + u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS; + + if (apply) + val |= mask; + else + val &= ~mask; + + I915_WRITE(CLKGATE_DIS_PSL(pipe), val); +} + static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -5469,6 +5483,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + bool psl_clkgate_wa; if (WARN_ON(intel_crtc->active)) return; @@ -5522,6 +5537,12 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(pipe_config); + /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && + intel_crtc->config->pch_pfit.enabled; + if (psl_clkgate_wa) + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); + if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); else @@ -5555,6 +5576,11 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_enable(crtc, pipe_config, old_state); + if (psl_clkgate_wa) { + intel_wait_for_vblank(dev_priv, pipe); + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); + } + if (intel_crtc->config->has_pch_encoder) { intel_wait_for_vblank(dev_priv, pipe); intel_wait_for_vblank(dev_priv, pipe); From e19c1eb885ac4186e64c7e484424124f3145318e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 2 Oct 2017 16:53:07 +0300 Subject: [PATCH 010/145] drm/i915: Fix DDI PHY init if it was already on The common lane power down flag of a DPIO PHY has a funky semantic: after the initial enabling of the PHY (so from a disabled state) this flag will be clear. It will be set only after the PHY will be used for the first time (for instance due to enabling the corresponding pipe) and then become unused (due to disabling the pipe). During the initial PHY enablement we don't know which of the above phases we are in, so move the check for the flag where this is known, the HW readout code. This is where the rest of lane power down status checks are done anyway. This fixes at least a problem on GLK where after module reloading, the common lane power down flag of PHY1 is set, but the PHY is actually powered-on and properly set up. The GRC readout code for other PHYs will hence think that PHY1 is not powered initially and disable it after the GRC readout. This will cause the AUX power well related to PHY1 to get disabled in a stuck state, timing out when we try to enable it later. Cc: Ville Syrjala Fixes: e93da0a0137b ("drm/i915/bxt: Sanitiy check the PHY lane power down status") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102777 Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171002135307.26117-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 3 ++- drivers/gpu/drm/i915/intel_dpio_phy.c | 20 -------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 93cbbcbbc193..65f4b6786791 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1713,7 +1713,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, out: if (ret && IS_GEN9_LP(dev_priv)) { tmp = I915_READ(BXT_PHY_CTL(port)); - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | + BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) DRM_ERROR("Port %c enabled but PHY powered down? " "(PHY_CTL %08x)\n", port_name(port), tmp); diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 09b670929786..de38d014ed39 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = { }, }; -static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) -{ - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | - BIT(phy_info->channel[DPIO_CH0].port); -} - static const struct bxt_ddi_phy_info * bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) { @@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - enum port port; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - return true; } From 4d6ef0da415124ebce578b1326f4e511e35d95f2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 2 Oct 2017 23:36:50 -0700 Subject: [PATCH 011/145] drm/i915/skl: Fix has_ipc on skl and document WaDisableIPC. According to Spec for SKL+: "Isochronous Priority Control. If enabled, Display sends demoted requests once the transition watermark is reached. If transition watermark is not enabled, Display sends demoted requests when the display buffer is full." The commit 'e57f1c02155f ("drm/i915/gen9+: Add has_ipc flag in device info structure")' introduced that as gen9+ but missing many SKL Skus. I believe the reason for that is Spec also mentions workarounds for SKL-ALL: "IPC (Isoch Priority Control) may cause underflows WA: Do not enable IPC in register ARB_CTL2" It seems lame to add the feature and forever disable it, but it will avoid a mistake of enabling it when we are reorganizing the feature definitions on i915_pci.c later. It will also allow us to probably extend that workaround for other platforms. Cc: Mahesh Kumar Cc: Maarten Lankhorst Cc: Chris Wilson Signed-off-by: Rodrigo Vivi Reviewed-by: Mahesh Kumar Link: https://patchwork.freedesktop.org/patch/msgid/20171003063652.17248-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 17823980b40d..66f4ab0cb2e8 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -426,6 +426,7 @@ static const struct intel_device_info intel_cherryview_info __initconst = { .platform = INTEL_SKYLAKE, \ .has_csr = 1, \ .has_guc = 1, \ + .has_ipc = 1, \ .ddb_size = 896 static const struct intel_device_info intel_skylake_gt1_info __initconst = { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c66af09e27a7..171b21f6c4ad 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5827,6 +5827,12 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv) { u32 val; + /* Display WA #0477 WaDisableIPC: skl */ + if (IS_SKYLAKE(dev_priv)) { + dev_priv->ipc_enabled = false; + return; + } + val = I915_READ(DISP_ARB_CTL2); if (dev_priv->ipc_enabled) From 42a3ae88500e394a79b27f6ae9705580033f805d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 2 Oct 2017 23:36:51 -0700 Subject: [PATCH 012/145] drm/i915: Organize GEN features inheritance. As Chris noticed the current organization is confusing and inheritance is not clear. So, let's split it in GEN_FEATURES _PLATFORM where new GEN inherit features from previous gens and Platforms only use gen features plus what ever is specific for that platform and shouldn't be passed on. Cc: Chris Wilson Signed-off-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171003063652.17248-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 48 +++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 66f4ab0cb2e8..bfec3a7841d0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -329,7 +329,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = { CURSOR_OFFSETS }; -#define HSW_FEATURES \ +#define G75_FEATURES \ GEN7_FEATURES, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ .has_ddi = 1, \ @@ -341,7 +341,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_runtime_pm = 1 #define HSW_PLATFORM \ - HSW_FEATURES, \ + G75_FEATURES, \ .platform = INTEL_HASWELL, \ .has_l3_dpf = 1 @@ -360,8 +360,8 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { .gt = 3, }; -#define BDW_FEATURES \ - HSW_FEATURES, \ +#define GEN8_FEATURES \ + G75_FEATURES, \ BDW_COLORS, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ @@ -369,7 +369,7 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { .has_reset_engine = 1 #define BDW_PLATFORM \ - BDW_FEATURES, \ + GEN8_FEATURES, \ .gen = 8, \ .platform = INTEL_BROADWELL @@ -420,15 +420,18 @@ static const struct intel_device_info intel_cherryview_info __initconst = { CHV_COLORS, }; -#define SKL_PLATFORM \ - BDW_FEATURES, \ - .gen = 9, \ - .platform = INTEL_SKYLAKE, \ +#define GEN9_FEATURES \ + GEN8_FEATURES, \ .has_csr = 1, \ .has_guc = 1, \ .has_ipc = 1, \ .ddb_size = 896 +#define SKL_PLATFORM \ + GEN9_FEATURES, \ + .gen = 9, \ + .platform = INTEL_SKYLAKE + static const struct intel_device_info intel_skylake_gt1_info __initconst = { SKL_PLATFORM, .gt = 1, @@ -497,13 +500,9 @@ static const struct intel_device_info intel_geminilake_info __initconst = { }; #define KBL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_KABYLAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_KABYLAKE static const struct intel_device_info intel_kabylake_gt1_info __initconst = { KBL_PLATFORM, @@ -522,13 +521,9 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = { }; #define CFL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_COFFEELAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_COFFEELAKE static const struct intel_device_info intel_coffeelake_gt1_info __initconst = { CFL_PLATFORM, @@ -546,16 +541,17 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; +#define GEN10_FEATURES \ + GEN9_FEATURES, \ + .ddb_size = 1024, \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + static const struct intel_device_info intel_cannonlake_gt2_info __initconst = { - BDW_FEATURES, + GEN10_FEATURES, .is_alpha_support = 1, .platform = INTEL_CANNONLAKE, .gen = 10, .gt = 2, - .ddb_size = 1024, - .has_csr = 1, - .has_ipc = 1, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; /* From 4672770d247806551de4aee6619d4f097eed155a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 2 Oct 2017 23:36:52 -0700 Subject: [PATCH 013/145] drm/i915: Organize GLK_COLORS. Let's organize this in a way that it gets more obvious when looking to the platform colors and in a easier way to get inherited. v2: Add comma at the end (Jani), when possible. Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171003063652.17248-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index bfec3a7841d0..d3a6fe5e3c10 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -54,6 +54,8 @@ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +#define GLK_COLORS \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } /* Keep in gen based order, and chronological order within a gen */ #define GEN2_FEATURES \ @@ -496,7 +498,7 @@ static const struct intel_device_info intel_geminilake_info __initconst = { GEN9_LP_FEATURES, .platform = INTEL_GEMINILAKE, .ddb_size = 1024, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + GLK_COLORS, }; #define KBL_PLATFORM \ @@ -544,7 +546,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = { #define GEN10_FEATURES \ GEN9_FEATURES, \ .ddb_size = 1024, \ - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + GLK_COLORS static const struct intel_device_info intel_cannonlake_gt2_info __initconst = { GEN10_FEATURES, From 3cf50c63a76177e0bbe0e46e1abe4eb263128ba4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 19 Sep 2017 14:14:18 +0200 Subject: [PATCH 014/145] drm/i915: Unset legacy_cursor_update early in intel_atomic_commit, v3. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b44d5c0c105a ("drm/i915: Always wait for flip_done, v2.") removed the call to wait_for_vblanks and replaced it with flip_done. Unfortunately legacy_cursor_update was unset too late, and the replacement call drm_atomic_helper_wait_for_flip_done() was a noop. Make sure that its unset before setup_commit() is called to fix this issue. Changes since v1: - Force vblank wait for watermarks not yet converted to atomic too. (Ville) - Use for_each_new_intel_crtc_in_state. (Ville) Changes since v2: - Move the optimization to a separate commit. (Ville) Signed-off-by: Maarten Lankhorst Fixes: b44d5c0c105a ("drm/i915: Always wait for flip_done, v2.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102675 Testcase: kms_cursor_crc Cc: Daniel Vetter Cc: Jani Nikula Reported-by: Marta Löfstedt Cc: Marta Löfstedt Tested-by: Marta Löfstedt Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170919121419.13708-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 33 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b7a6ddc6a66d..e55f68cda755 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12541,21 +12541,10 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - ret = drm_atomic_helper_setup_commit(state, nonblock); - if (ret) - return ret; - drm_atomic_state_get(state); i915_sw_fence_init(&intel_state->commit_ready, intel_atomic_commit_ready); - ret = intel_atomic_prepare_commit(dev, state); - if (ret) { - DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&intel_state->commit_ready); - return ret; - } - /* * The intel_legacy_cursor_update() fast path takes care * of avoiding the vblank waits for simple cursor @@ -12564,11 +12553,11 @@ static int intel_atomic_commit(struct drm_device *dev, * updates happen during the correct frames. Gen9+ have * double buffered watermarks and so shouldn't need this. * - * Do this after drm_atomic_helper_setup_commit() and - * intel_atomic_prepare_commit() because we still want - * to skip the flip and fb cleanup waits. Although that - * does risk yanking the mapping from under the display - * engine. + * Unset state->legacy_cursor_update before the call to + * drm_atomic_helper_setup_commit() because otherwise + * drm_atomic_helper_wait_for_flip_done() is a noop and + * we get FIFO underruns because we didn't wait + * for vblank. * * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. @@ -12576,7 +12565,17 @@ static int intel_atomic_commit(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 9) state->legacy_cursor_update = false; - ret = drm_atomic_helper_swap_state(state, true); + ret = intel_atomic_prepare_commit(dev, state); + if (ret) { + DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); + i915_sw_fence_commit(&intel_state->commit_ready); + return ret; + } + + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (!ret) + ret = drm_atomic_helper_swap_state(state, true); + if (ret) { i915_sw_fence_commit(&intel_state->commit_ready); From 213f1bd035edc36f8d68c028d1b79675cd167d1d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 19 Sep 2017 14:14:19 +0200 Subject: [PATCH 015/145] drm/i915: Skip vblank waits for cursor updates when watermarks dont need updating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In legacy cursor updates we need the extra vblank waits if we update watermarks, and then we cannot skip the vblank for cursors. This is why for < gen9 we disabled the cursor fastpath, but we can skip the wait when post vblank watermarks are untouched. Signed-off-by: Maarten Lankhorst Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170919121419.13708-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e55f68cda755..cdb2e25a577c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12562,8 +12562,16 @@ static int intel_atomic_commit(struct drm_device *dev, * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. */ - if (INTEL_GEN(dev_priv) < 9) - state->legacy_cursor_update = false; + if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) + if (new_crtc_state->wm.need_postvbl_update || + new_crtc_state->update_wm_post) + state->legacy_cursor_update = false; + } ret = intel_atomic_prepare_commit(dev, state); if (ret) { From 57522c4c87de20d8f7ad4e142a3a4334066d55ff Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Oct 2017 12:51:58 +0300 Subject: [PATCH 016/145] drm/i915/cnl: Reprogram DMC firmware after S3/S4 resume The DMC firmware program memory is lost after S3/S4 system suspend, so we need to reprogram it during resume. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103070 Fixes: cebfcead63de ("drm/i915/DMC/CNL: Load DMC on CNL") Cc: Anusha Srivatsa Cc: Animesh Manna Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171003095159.711-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 7933d1bc6a1c..3791c3f5f56d 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2809,6 +2809,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 6. Enable DBUF */ gen9_dbuf_enable(dev_priv); + + if (resume && dev_priv->csr.dmc_payload) + intel_csr_load_program(dev_priv); } static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) From b7208a3f3e52791571df064fb96025ad48edd1bf Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Oct 2017 12:51:59 +0300 Subject: [PATCH 017/145] drm/i915/glk: Fix DMC/DC state idleness calculation According to BSpec GLK like BXT needs to ignore the idle state of cores before starting the DMC firmware's DC state handler. Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK") Cc: Anusha Srivatsa Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171003095159.711-2-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index cdfb624eb82d..ea5d5c9645a4 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -216,7 +216,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ From 8279aaf59006fb19ca8caa3f0f0b8bfc33a0dd1a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 4 Oct 2017 11:44:16 +0200 Subject: [PATCH 018/145] drm/i915: Remove use_mmio_flip modparm, v2. This has been unused since commit afa8ce5b3080 ("drm/i915: Nuke legacy flip queueing code"). Changes since v1: - Rebase on top of all the changes to modparams. Cc: Chris Wilson Signed-off-by: Maarten Lankhorst Reviewed-by: Chris Wilson \o/-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171004094416.31306-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/i915_params.c | 3 --- drivers/gpu/drm/i915/i915_params.h | 1 - drivers/gpu/drm/i915/intel_lrc.c | 3 +-- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9dff323a83d3..b4faeb6aa2bd 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -146,9 +146,6 @@ i915_param_named(disable_display, bool, 0400, i915_param_named_unsafe(enable_cmd_parser, bool, 0400, "Enable command parsing (true=enabled [default], false=disabled)"); -i915_param_named_unsafe(use_mmio_flip, int, 0600, - "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)"); - i915_param_named(mmio_debug, int, 0600, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 4f3f8d650194..c7292268ed43 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -49,7 +49,6 @@ param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ - param(int, use_mmio_flip, 0) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7d6da130b184..6ea5ecf08230 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -244,8 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && - USES_PPGTT(dev_priv) && - i915_modparams.use_mmio_flip >= 0) + USES_PPGTT(dev_priv)) return 1; return 0; From 32ced39c1b122679f829cfdac5a679b3a5aefeaf Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 28 Sep 2017 15:40:39 -0700 Subject: [PATCH 019/145] drm/i915: Transform whitelisting WAs into a simple reg write RING_FORCE_TO_NONPRIV registers do not live in the logical context. They are simply global privileged MMIO registers that happen to be powercontext saved and restored (meaning only they can survive RC6). Therefore, there is absolutely no need to save them so that they can be restored everytime we create a new logical context. Suggested-by: Chris Wilson Signed-off-by: Oscar Mateo Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/1506638439-6903-1-git-send-email-oscar.mateo@intel.com Acked-by: Michel Thierry Tested-by: Chris Wilson #bxt Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a28e2a864cf1..a75f5e889927 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -845,8 +845,8 @@ static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) return -EINVAL; - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); wa->hw_whitelist_count[engine->id]++; return 0; From 53221e11c7a0e85004c1a28f74e4e173f098d262 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Oct 2017 13:41:52 +0100 Subject: [PATCH 020/145] drm/i915: Move MMCD_MISC_CTRL from context w/a to standard Looking at gem_workarounds shows us that MMCD_MISC_CTRL is not restored following a suspend-resume cycle. This implies that MMCD_MISC_CTRL is not stored in the context, but is an ordinary register w/a that we need to restore during init_hw. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Oscar Mateo Link: https://patchwork.freedesktop.org/patch/msgid/20171004124153.14142-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a75f5e889927..8625feb0939e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -980,7 +980,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) GEN9_PBE_COMPRESSED_HASH_SELECTION); WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN); + + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ From 8d488bbec73fa36907e1b327e919bcd522b4a57f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Oct 2017 13:41:53 +0100 Subject: [PATCH 021/145] drm/i915: Remove WA_(SET|CLR)_BIT These macros are of dubious merit when coupled with the per-context w/a set. Instead of tweaking the value in the context, they tweak the value based on the mmio at the time of recording; they are almost by definition not per-context! Having removed the last users, remove the macros to avoid temptation in the future. v2: Kill WA_WRITE as well (now also unused). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Oscar Mateo Link: https://patchwork.freedesktop.org/patch/msgid/20171004124153.14142-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8625feb0939e..e804a9b816f1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -830,11 +830,6 @@ static int wa_add(struct drm_i915_private *dev_priv, #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, i915_reg_t reg) { From 6c1fa341d60285cf3f2c7ca99b4abe7a32b53ff5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 13:50:54 +0100 Subject: [PATCH 022/145] drm/i915: Fix some tracepoints to capture full 64b The tracepoints need some tlc, in particular we've neglected to update them for the 64b era. v2: Prefix hexadecimal output with 0x. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171003125055.11370-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_trace.h | 44 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 92f4c5bb7aa7..cca73636a747 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -345,7 +345,7 @@ TRACE_EVENT(i915_gem_object_create, TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -353,7 +353,7 @@ TRACE_EVENT(i915_gem_object_create, __entry->size = obj->base.size; ), - TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) + TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size) ); TRACE_EVENT(i915_gem_shrink, @@ -384,7 +384,7 @@ TRACE_EVENT(i915_vma_bind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) __field(unsigned, flags) ), @@ -396,7 +396,7 @@ TRACE_EVENT(i915_vma_bind, __entry->flags = flags; ), - TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->flags & PIN_MAPPABLE ? ", mappable" : "", __entry->vm) @@ -410,7 +410,7 @@ TRACE_EVENT(i915_vma_unbind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -420,18 +420,18 @@ TRACE_EVENT(i915_vma_unbind, __entry->size = vma->node.size; ), - TP_printk("obj=%p, offset=%016llx size=%x vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->vm) ); TRACE_EVENT(i915_gem_object_pwrite, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -440,18 +440,18 @@ TRACE_EVENT(i915_gem_object_pwrite, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_pread, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -460,17 +460,17 @@ TRACE_EVENT(i915_gem_object_pread, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_fault, - TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), + TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write), TP_ARGS(obj, index, gtt, write), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, index) + __field(u64, index) __field(bool, gtt) __field(bool, write) ), @@ -482,7 +482,7 @@ TRACE_EVENT(i915_gem_object_fault, __entry->write = write; ), - TP_printk("obj=%p, %s index=%u %s", + TP_printk("obj=%p, %s index=%llu %s", __entry->obj, __entry->gtt ? "GTT" : "CPU", __entry->index, @@ -515,14 +515,14 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags), TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) __field(struct i915_address_space *, vm) - __field(u32, size) - __field(u32, align) + __field(u64, size) + __field(u64, align) __field(unsigned int, flags) ), @@ -534,7 +534,7 @@ TRACE_EVENT(i915_gem_evict, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s", __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); @@ -593,7 +593,7 @@ TRACE_EVENT(i915_gem_evict_node, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x", + TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x", __entry->dev, __entry->vm, __entry->start, __entry->size, __entry->color, __entry->flags) From 65921223942435f993ca56cd0a04e686700766bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 13:50:55 +0100 Subject: [PATCH 023/145] drm/i915: Remove defunct trace points trace_i915_gem_evict_everything and trace_i915_gem_ring_flush stopped being used when their parent functions were removed. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171003125055.11370-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_trace.h | 72 ++++++++----------------------- 1 file changed, 17 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index cca73636a747..9cab91ddeb79 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -539,38 +539,6 @@ TRACE_EVENT(i915_gem_evict, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); -TRACE_EVENT(i915_gem_evict_everything, - TP_PROTO(struct drm_device *dev), - TP_ARGS(dev), - - TP_STRUCT__entry( - __field(u32, dev) - ), - - TP_fast_assign( - __entry->dev = dev->primary->index; - ), - - TP_printk("dev=%d", __entry->dev) -); - -TRACE_EVENT(i915_gem_evict_vm, - TP_PROTO(struct i915_address_space *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(u32, dev) - __field(struct i915_address_space *, vm) - ), - - TP_fast_assign( - __entry->dev = vm->i915->drm.primary->index; - __entry->vm = vm; - ), - - TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) -); - TRACE_EVENT(i915_gem_evict_node, TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags), TP_ARGS(vm, node, flags), @@ -599,6 +567,23 @@ TRACE_EVENT(i915_gem_evict_node, __entry->color, __entry->flags) ); +TRACE_EVENT(i915_gem_evict_vm, + TP_PROTO(struct i915_address_space *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u32, dev) + __field(struct i915_address_space *, vm) + ), + + TP_fast_assign( + __entry->dev = vm->i915->drm.primary->index; + __entry->vm = vm; + ), + + TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) +); + TRACE_EVENT(i915_gem_ring_sync_to, TP_PROTO(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from), @@ -649,29 +634,6 @@ TRACE_EVENT(i915_gem_request_queue, __entry->flags) ); -TRACE_EVENT(i915_gem_ring_flush, - TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush), - TP_ARGS(req, invalidate, flush), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, ring) - __field(u32, invalidate) - __field(u32, flush) - ), - - TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->ring = req->engine->id; - __entry->invalidate = invalidate; - __entry->flush = flush; - ), - - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", - __entry->dev, __entry->ring, - __entry->invalidate, __entry->flush) -); - DECLARE_EVENT_CLASS(i915_gem_request, TP_PROTO(struct drm_i915_gem_request *req), TP_ARGS(req), From de7e095a4b6b9dbc643beb39f28b3022fec28220 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 15:33:22 +0000 Subject: [PATCH 024/145] drm/i915: Make intel_uncore.h header self-contained We're trying to resolve inter-header dependencies. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uncore.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 03786f931905..66eae2ce2f29 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -25,6 +25,12 @@ #ifndef __INTEL_UNCORE_H__ #define __INTEL_UNCORE_H__ +#include +#include +#include + +#include "i915_reg.h" + struct drm_i915_private; enum forcewake_domain_id { From c23b4f460eb4a44ffbc69af1269e7e212e193a25 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 15:33:23 +0000 Subject: [PATCH 025/145] drm/i915/uc: Drop unnecessary forward declaration We don't need it here. Signed-off-by: Michal Wajdeczko Cc: Sagar Arun Kamble Cc: Joonas Lahtinen Reviewed-by: Sagar Arun Kamble Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 6966349ed737..38ec88016df9 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -30,8 +30,6 @@ #include "intel_guc_ct.h" #include "i915_vma.h" -struct drm_i915_gem_request; - /* * This structure primarily describes the GEM object shared with the GuC. * The specs sometimes refer to this object as a "GuC context", but we use From 1fc556fa346948c337d1b04d809443f65e77e04f Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 4 Oct 2017 15:33:24 +0000 Subject: [PATCH 026/145] drm/i915/uc: Create intel_uc_init_mmio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds new function intel_uc_init_mmio which will initialize MMIO access related variables prior to uc load/init. v2: Removed unnecessary export of guc_send_init_regs. Created intel_uc_init_mmio that currently wraps guc_init_send_regs. (Michal) v3 (Michal): add kerneldoc (Joonas) Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michał Winiarski Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_uc.c | 15 +++++++++++++-- drivers/gpu/drm/i915/intel_uc.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5cc24344c266..74a456fe487f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1007,6 +1007,8 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_uc_init_mmio(dev_priv); + ret = intel_engines_init_mmio(dev_priv); if (ret) goto err_uncore; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 277477890240..8151a9dc12ea 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -288,6 +288,19 @@ static void guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } +/** + * intel_uc_init_mmio - setup uC MMIO access + * + * @dev_priv: device private + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct drm_i915_private *dev_priv) +{ + guc_init_send_regs(&dev_priv->guc); +} + static void guc_capture_load_err_log(struct intel_guc *guc) { if (!guc->log.vma || i915_modparams.guc_log_level < 0) @@ -309,8 +322,6 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_init_send_regs(guc); - if (HAS_GUC_CT(dev_priv)) return intel_guc_enable_ct(guc); diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 38ec88016df9..388528f510c5 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -202,6 +202,7 @@ struct intel_huc { /* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); From a16b4313ae56ec4d19d0c846487810ff3811a742 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 15:33:25 +0000 Subject: [PATCH 027/145] drm/i915/uc: Move uC fw helper code into dedicated files This is a prerequisite to unblock next steps. v2: correct include order (Joonas) v3: use common function prefix (Joonas) add kerneldoc (Michal) Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_uc.c | 159 +----------------------- drivers/gpu/drm/i915/intel_uc.h | 67 +--------- drivers/gpu/drm/i915/intel_uc_fw.c | 193 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc_fw.h | 98 +++++++++++++++ 5 files changed, 297 insertions(+), 221 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_uc_fw.c create mode 100644 drivers/gpu/drm/i915/intel_uc_fw.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5182e3d5557d..4850f260aead 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -59,6 +59,7 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_uc_fw.o \ intel_guc_ct.o \ intel_guc_log.o \ intel_guc_loader.o \ diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 8151a9dc12ea..25694ddfa72b 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -26,19 +26,6 @@ #include "intel_uc.h" #include -/* Cleans up uC firmware by releasing the firmware GEM obj. - */ -static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&uc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} - /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) @@ -112,154 +99,16 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) guc->notify = gen8_guc_raise_irq; } -static void fetch_uc_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - if (!uc_fw->path) - return; - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a - * well-known offset within the firmware blob; note that major / minor - * version are TWO bytes each (i.e. u16), although all pointers and - * offsets are defined in terms of bytes (u8). - */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping %s firmware version check\n", - intel_uc_fw_type_repr(uc_fw->type)); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; -} - void intel_uc_init_fw(struct drm_i915_private *dev_priv) { - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { - __intel_uc_fw_fini(&dev_priv->guc.fw); - __intel_uc_fw_fini(&dev_priv->huc.fw); + intel_uc_fw_fini(&dev_priv->guc.fw); + intel_uc_fw_fini(&dev_priv->huc.fw); } static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 388528f510c5..f0d5a3f310b5 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -24,6 +24,7 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ +#include "intel_uc_fw.h" #include "intel_guc_fwif.h" #include "i915_guc_reg.h" #include "intel_ringbuffer.h" @@ -70,72 +71,6 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; -enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -1, - INTEL_UC_FIRMWARE_NONE = 0, - INTEL_UC_FIRMWARE_PENDING, - INTEL_UC_FIRMWARE_SUCCESS -}; - -/* User-friendly representation of an enum */ -static inline -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - } - return ""; -} - -enum intel_uc_fw_type { - INTEL_UC_FW_TYPE_GUC, - INTEL_UC_FW_TYPE_HUC -}; - -/* User-friendly representation of an enum */ -static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) -{ - switch (type) { - case INTEL_UC_FW_TYPE_GUC: - return "GuC"; - case INTEL_UC_FW_TYPE_HUC: - return "HuC"; - } - return "uC"; -} - -/* - * This structure encapsulates all the data needed during the process - * of fetching, caching, and loading the firmware image into the GuC. - */ -struct intel_uc_fw { - const char *path; - size_t size; - struct drm_i915_gem_object *obj; - enum intel_uc_fw_status fetch_status; - enum intel_uc_fw_status load_status; - - uint16_t major_ver_wanted; - uint16_t minor_ver_wanted; - uint16_t major_ver_found; - uint16_t minor_ver_found; - - enum intel_uc_fw_type type; - uint32_t header_size; - uint32_t header_offset; - uint32_t rsa_size; - uint32_t rsa_offset; - uint32_t ucode_size; - uint32_t ucode_offset; -}; - struct intel_guc_log { uint32_t flags; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c new file mode 100644 index 000000000000..766b1cbdfbd7 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -0,0 +1,193 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "intel_uc_fw.h" +#include "i915_drv.h" + +/** + * intel_uc_fw_fetch - fetch uC firmware + * + * @dev_priv: device private + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + */ +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + if (!uc_fw->path) + return; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) + goto fail; + if (!fw) + goto fail; + + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", + uc_fw->path, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_NOTE("Firmware header is missing\n"); + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * + sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_NOTE("CSS header definition mismatch\n"); + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + DRM_NOTE("RSA key size is bad\n"); + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_NOTE("Missing firmware components\n"); + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + /* Header and uCode will be loaded to WOPCM. Size of the two. */ + size = uc_fw->header_size + uc_fw->ucode_size; + + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + goto fail; + } + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); + err = -ENOEXEC; + goto fail; + } + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("Skipping %s firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", + uc_fw->obj); + + release_firmware(fw); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + return; + +fail: + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", + uc_fw->path, err); + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, uc_fw->obj); + + release_firmware(fw); /* OK even if fw is NULL */ + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; +} + +/** + * intel_uc_fw_fini - cleanup uC firmware + * + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h new file mode 100644 index 000000000000..849b2ce685c0 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -0,0 +1,98 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +struct drm_i915_private; + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_FAIL = -1, + INTEL_UC_FIRMWARE_NONE = 0, + INTEL_UC_FIRMWARE_PENDING, + INTEL_UC_FIRMWARE_SUCCESS +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC, + INTEL_UC_FW_TYPE_HUC +}; + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + const char *path; + size_t size; + struct drm_i915_gem_object *obj; + enum intel_uc_fw_status fetch_status; + enum intel_uc_fw_status load_status; + + uint16_t major_ver_wanted; + uint16_t minor_ver_wanted; + uint16_t major_ver_found; + uint16_t minor_ver_found; + + enum intel_uc_fw_type type; + uint32_t header_size; + uint32_t header_offset; + uint32_t rsa_size; + uint32_t rsa_offset; + uint32_t ucode_size; + uint32_t ucode_offset; +}; + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + } + return ""; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); + +#endif From d56d63d78c69d880517b5bb72e5ee2be3ead7b18 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 15:33:26 +0000 Subject: [PATCH 028/145] drm/i915/huc: Move HuC declarations into dedicated header We want to keep each uC specific code in separate files. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_huc.h | 39 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.h | 13 +---------- 2 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_huc.h diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h new file mode 100644 index 000000000000..d58422b0c85f --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ +}; + +void intel_huc_select_fw(struct intel_huc *huc); +void intel_huc_init_hw(struct intel_huc *huc); +void intel_huc_auth(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f0d5a3f310b5..4fa091e90b5f 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -30,6 +30,7 @@ #include "intel_ringbuffer.h" #include "intel_guc_ct.h" #include "i915_vma.h" +#include "intel_huc.h" /* * This structure primarily describes the GEM object shared with the GuC. @@ -127,13 +128,6 @@ struct intel_guc { void (*notify)(struct intel_guc *guc); }; -struct intel_huc { - /* Generic uC firmware management */ - struct intel_uc_fw fw; - - /* HuC-specific additions */ -}; - /* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); @@ -186,9 +180,4 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) return offset; } -/* intel_huc.c */ -void intel_huc_select_fw(struct intel_huc *huc); -void intel_huc_init_hw(struct intel_huc *huc); -void intel_huc_auth(struct intel_huc *huc); - #endif From 3af7a9c6446b4489d208e501d95c5745d89f6628 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 15:33:27 +0000 Subject: [PATCH 029/145] drm/i915/guc: Move Guc early init into own function We don't want to make aggregate uc functions to be too detailed. This will also make future patch easier. Signed-off-by: Michal Wajdeczko Cc: Sagar Arun Kamble Cc: Joonas Lahtinen Reviewed-by: Sagar Arun Kamble Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004153327.32608-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 25694ddfa72b..e7875277ba97 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -88,10 +88,8 @@ static void gen8_guc_raise_irq(struct intel_guc *guc) I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); } -void intel_uc_init_early(struct drm_i915_private *dev_priv) +static void guc_init_early(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - intel_guc_ct_init_early(&guc->ct); mutex_init(&guc->send_mutex); @@ -99,6 +97,11 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) guc->notify = gen8_guc_raise_irq; } +void intel_uc_init_early(struct drm_i915_private *dev_priv) +{ + guc_init_early(&dev_priv->guc); +} + void intel_uc_init_fw(struct drm_i915_private *dev_priv) { intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); From 1e998343f95b46497c56a21de1b14a302256f973 Mon Sep 17 00:00:00 2001 From: Jeff McGee Date: Tue, 3 Oct 2017 21:34:45 +0100 Subject: [PATCH 030/145] drm/i915/preempt: Fix WaEnablePreemptionGranularityControlByUMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WA applies to all production Gen9 and requires both enabling and whitelisting of the per-context preemption control register. v2: Extend to Cannonlake. Signed-off-by: Jeff McGee Signed-off-by: Michał Winiarski Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e804a9b816f1..6245970eb3ab 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1075,8 +1075,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -1138,14 +1140,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - /* WaEnableGapsTsvCreditFix:skl */ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE)); @@ -1278,6 +1272,8 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; From 5152defe4a53ad15e6d96c422440152302c8abd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 3 Oct 2017 21:34:46 +0100 Subject: [PATCH 031/145] drm/i915/preempt: Default to disabled mid-command preemption levels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supporting fine-granularity preemption levels may require changes in userspace batch buffer programming. Therefore, we need to fallback to safe default values, rather that use hardware defaults. Userspace is still able to enable fine-granularity, since we're whitelisting the register controlling it in WaEnablePreemptionGranularityControlByUMD. v2: Extend w/a to cover Cannonlake v3: Fix commentary to include both fake w/a names. Signed-off-by: Michał Winiarski Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 39ad9327e2a0..e7dba5539b11 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7003,6 +7003,12 @@ enum { #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) +#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0) +#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) +#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) +#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) +#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6245970eb3ab..dd64e3d13aa9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1070,6 +1070,24 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) @@ -1271,6 +1289,13 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) /* FtrEnableFastAnisoL1BankingFix: cnl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaEnablePreemptionGranularityControlByUMD:cnl */ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); From d6c0511300dcff19969844495ba293c4efb50b42 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:47 +0100 Subject: [PATCH 032/145] drm/i915/execlists: Distinguish the incomplete context notifies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the listener know that the context we just scheduled out was not complete, and will be scheduled back in at a later point. v2: Handle CONTEXT_STATUS_PREEMPTED in gvt by aliasing it to CONTEXT_STATUS_OUT for the moment, gvt can expand upon the difference later. Signed-off-by: Chris Wilson Cc: "Zhenyu Wang" Cc: "Wang, Zhi A" Cc: Michał Winiarski Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d5892d24f0b6..f6ded475bb2c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -174,6 +174,7 @@ static int shadow_context_status_change(struct notifier_block *nb, atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: atomic_set(&workload->shadow_ctx_active, 0); break; default: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6ea5ecf08230..a89bc1f148fb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -617,7 +617,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists) while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); memset(port, 0, sizeof(*port)); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 314adee7127a..689fde1a63a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -61,6 +61,7 @@ enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, }; /* Logical Rings */ From e7af3116836fb7feb985497f2c7776751fb27ef3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:48 +0100 Subject: [PATCH 033/145] drm/i915: Introduce a preempt context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add another perma-pinned context for using for preemption at any time. We cannot just reuse the existing kernel context, as first and foremost we need to ensure that we can preempt the kernel context itself, so require a distinct context id. Similar to the kernel context, we may want to interrupt execution and switch to the preempt context at any time, and so it needs to be permanently pinned and available. To compensate for yet another permanent allocation, we shrink the existing context and the new context by reducing their ringbuffer to the minimum. v2: Assert that we never allocate a request from the preemption context. v3: Limit perma-pin to engines that may preempt. v4: Onion cleanup for early driver death v5: Onion ordering in main driver cleanup as well. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_gem_context.c | 78 ++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_request.c | 7 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 22 ++++++- 4 files changed, 88 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ca11318ac69..31292afb961d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -783,6 +783,7 @@ struct intel_csr { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_preemption); \ func(has_overlay); \ func(has_pipe_cxsr); \ func(has_pooled_eu); \ @@ -2251,8 +2252,11 @@ struct drm_i915_private { wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; - struct i915_gem_context *kernel_context; struct intel_engine_cs *engine[I915_NUM_ENGINES]; + /* Context used internally to idle the GPU and setup initial state */ + struct i915_gem_context *kernel_context; + /* Context only to be used for injecting preemption commands */ + struct i915_gem_context *preempt_context; struct i915_vma *semaphore; struct drm_dma_handle *status_page_dmah; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 921ee369c74d..2bb8e58706ba 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -416,14 +416,43 @@ out: return ctx; } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +static struct i915_gem_context * +create_kernel_context(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - /* Init should only be called once per module load. Eventually the - * restriction on the context_disabled check can be loosened. */ - if (WARN_ON(dev_priv->kernel_context)) - return 0; + ctx = i915_gem_create_context(i915, NULL); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + ctx->priority = prio; + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + +int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + int err; + + GEM_BUG_ON(dev_priv->kernel_context); INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); @@ -441,28 +470,38 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); - ctx = i915_gem_create_context(dev_priv, NULL); + /* lowest priority; idle task */ + ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context (error %ld)\n", - PTR_ERR(ctx)); - return PTR_ERR(ctx); + DRM_ERROR("Failed to create default global context\n"); + err = PTR_ERR(ctx); + goto err; } - - /* For easy recognisablity, we want the kernel context to be 0 and then + /* + * For easy recognisablity, we want the kernel context to be 0 and then * all user contexts will have non-zero hw_id. */ GEM_BUG_ON(ctx->hw_id); - - i915_gem_context_clear_bannable(ctx); - ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + /* highest priority; preempting task */ + ctx = create_kernel_context(dev_priv, INT_MAX); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default preempt context\n"); + err = PTR_ERR(ctx); + goto err_kernel_context; + } + dev_priv->preempt_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->engine[RCS]->context_size ? "logical" : "fake"); return 0; + +err_kernel_context: + destroy_kernel_context(&dev_priv->kernel_context); +err: + return err; } void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) @@ -507,15 +546,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) void i915_gem_contexts_fini(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; - lockdep_assert_held(&i915->drm.struct_mutex); - /* Keep the context so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - context_close(ctx); - i915_gem_context_free(ctx); + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ ida_destroy(&i915->contexts.hw_ida); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 14956d899911..b100b38f1dd2 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -587,6 +587,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == dev_priv->preempt_context); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index dd64e3d13aa9..8b8053d8e24b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -613,9 +613,22 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (IS_ERR(ring)) return PTR_ERR(ring); + /* + * Similarly the preempt context must always be available so that + * we can interrupt the engine at any time. + */ + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { + ring = engine->context_pin(engine, + engine->i915->preempt_context); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto err_unpin_kernel; + } + } + ret = intel_engine_init_breadcrumbs(engine); if (ret) - goto err_unpin; + goto err_unpin_preempt; ret = i915_gem_render_state_init(engine); if (ret) @@ -634,7 +647,10 @@ err_rs_fini: i915_gem_render_state_fini(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); -err_unpin: +err_unpin_preempt: + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); +err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); return ret; } @@ -660,6 +676,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } From 3ad7b52d962e7c7da11128be4108f606460ae4d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:49 +0100 Subject: [PATCH 034/145] drm/i915/execlists: Move bdw GPGPU w/a to emit_bb Move the re-enabling of MI arbitration from a per-bb w/a buffer to the emission of the batch buffer itself. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a89bc1f148fb..1703bf6c98d6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1158,24 +1158,6 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -/* - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. - * - * The number of DWORDS written are returned using this field. - * - * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding - * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. - */ -static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *batch++ = MI_BATCH_BUFFER_END; - - return batch; -} - static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ @@ -1290,7 +1272,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) break; case 8: wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = gen8_init_perctx_bb; + wa_bb_fn[1] = NULL; break; default: MISSING_CASE(INTEL_GEN(engine->i915)); @@ -1534,13 +1516,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, if (IS_ERR(cs)) return PTR_ERR(cs); + /* WaDisableCtxRestoreArbitration:bdw,chv */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* FIXME(BDW): Address space and security selectors. */ *cs++ = MI_BATCH_BUFFER_START_GEN8 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; From 1f181225f8ec4ca59b53239aa06475d7c5b936b3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:50 +0100 Subject: [PATCH 035/145] drm/i915/execlists: Keep request->priority for its lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With preemption, we will want to "unsubmit" a request, taking it back from the hw and returning it to the priority sorted execution list. In order to know where to insert it into that list, we need to remember its adjust priority (which may change even as it was being executed). This also affects reset for execlists as we are now unsubmitting the requests following the reset (rather than directly writing the ELSP for the inflight contexts). This turns reset into an accidental preemption point, as after the reset we may choose a different pair of contexts to submit to hw. GuC is not updated as this series doesn't add preemption to the GuC submission, and so it can keep benefiting from the early pruning of the DFS inside execlists_schedule() for a little longer. We also need to find a way of reducing the cost of that DFS... v2: Include priority in error-state Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Michał Winiarski Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++---- drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 31292afb961d..1fc7080bfa7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -982,6 +982,7 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; + int priority; int ban_score; int active; int guilty; @@ -1004,6 +1005,7 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; + int priority; int ban_score; u32 seqno; u32 head; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c14552ab270b..dc91b32d699e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -377,9 +377,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, + erq->context, erq->seqno, erq->priority, jiffies_to_msecs(jiffies - erq->jiffies), erq->head, erq->tail); } @@ -388,9 +388,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->ban_score, ctx->guilty, ctx->active); + ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -1271,6 +1271,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; + erq->priority = request->priotree.priority; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; @@ -1364,6 +1365,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; + e->priority = ctx->priority; e->ban_score = atomic_read(&ctx->ban_score); e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1703bf6c98d6..5821762d9007 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -584,8 +584,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; - __i915_gem_request_submit(rq); trace_i915_gem_request_in(rq, port_index(port, execlists)); last = rq; @@ -793,6 +791,7 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); trace_i915_gem_request_out(rq); + rq->priotree.priority = INT_MAX; i915_gem_request_put(rq); execlists_port_complete(execlists, port); @@ -845,11 +844,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +{ + return container_of(pt, struct drm_i915_gem_request, priotree); +} + static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = - container_of(pt, struct drm_i915_gem_request, priotree)->engine; + struct intel_engine_cs *engine = pt_to_request(pt)->engine; GEM_BUG_ON(!locked); @@ -905,6 +908,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { + if (i915_gem_request_completed(pt_to_request(p->signaler))) + continue; + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); From bf64e0b00e1ff30449cc68e7bc7498adb6faa343 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:51 +0100 Subject: [PATCH 036/145] drm/i915: Expand I915_PARAM_HAS_SCHEDULER into a capability bitmask In the next few patches, we wish to enable different features for the scheduler, some which may subtlety change ABI (e.g. allow requests to be reordered under different circumstances). So we need to make sure userspace is cognizant of the changes (if they care), by which we employ the usual method of a GETPARAM. We already have an I915_PARAM_HAS_SCHEDULER (which notes the existing ability to reorder requests to avoid bubbles), and now we wish to extend that to be a bitmask to describe the different capabilities implemented. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 5 +++-- include/uapi/drm/i915_drm.h | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 74a456fe487f..006332805702 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -367,8 +367,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915_gem_mmap_gtt_version(); break; case I915_PARAM_HAS_SCHEDULER: - value = dev_priv->engine[RCS] && - dev_priv->engine[RCS]->schedule; + value = 0; + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) + value |= I915_SCHEDULER_CAP_ENABLED; break; case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fe25a01c81f2..aa4a3b20ef6b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -397,10 +397,17 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_MIN_EU_IN_POOL 39 #define I915_PARAM_MMAP_GTT_VERSION 40 -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. */ #define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + #define I915_PARAM_HUC_STATUS 42 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of From beecec9017901849352cfd2886981fe462f9fed0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:52 +0100 Subject: [PATCH 037/145] drm/i915/execlists: Preemption! When we write to ELSP, it triggers a context preemption at the earliest arbitration point (3DPRIMITIVE, some PIPECONTROLs, a few other operations and the explicit MI_ARB_CHECK). If this is to the same context, it triggers a LITE_RESTORE where the RING_TAIL is merely updated (used currently to chain requests from the same context together, avoiding bubbles). However, if it is to a different context, a full context-switch is performed and it will start to execute the new context saving the image of the old for later execution. Previously we avoided preemption by only submitting a new context when the old was idle. But now we wish embrace it, and if the new request has a higher priority than the currently executing request, we write to the ELSP regardless, thus triggering preemption, but we tell the GPU to switch to our special preemption context (not the target). In the context-switch interrupt handler, we know that the previous contexts have finished execution and so can unwind all the incomplete requests and compute the new highest priority request to execute. It would be feasible to avoid the switch-to-idle intermediate by programming the ELSP with the target context. The difficulty is in tracking which request that should be whilst maintaining the dependency change, the error comes in with coalesced requests. As we only track the most recent request and its priority, we may run into the issue of being tricked in preempting a high priority request that was followed by a low priority request from the same context (e.g. for PI); worse still that earlier request may be our own dependency and the order then broken by preemption. By injecting the switch-to-idle and then recomputing the priority queue, we avoid the issue with tracking in-flight coalesced requests. Having tried the preempt-to-busy approach, and failed to find a way around the coalesced priority issue, Michal's original proposal to inject an idle context (based on handling GuC preemption) succeeds. The current heuristic for deciding when to preempt are only if the new request is of higher priority, and has the privileged priority of greater than 0. Note that the scheduler remains unfair! v2: Disable for gen8 (bdw/bsw) as we need additional w/a for GPGPU. Since, the feature is now conditional and not always available when we have a scheduler, make it known via the HAS_SCHEDULER GETPARAM (now a capability mask). v3: Stylistic tweaks. v4: Appease Joonas with a snippet of kerneldoc, only to fuel to fire of the preempt vs preempting debate. Suggested-by: Michal Winiarski Signed-off-by: Chris Wilson Cc: Michal Winiarski Cc: Tvrtko Ursulin Cc: Arkadiusz Hiler Cc: Mika Kuoppala Cc: Ben Widawsky Cc: Zhenyu Wang Cc: Zhi Wang Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 9 +- drivers/gpu/drm/i915/i915_irq.c | 6 +- drivers/gpu/drm/i915/i915_pci.c | 2 + drivers/gpu/drm/i915/intel_lrc.c | 157 +++++++++++++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 + 5 files changed, 143 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 006332805702..7614880edad8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -368,9 +368,16 @@ static int i915_getparam(struct drm_device *dev, void *data, break; case I915_PARAM_HAS_SCHEDULER: value = 0; - if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { value |= I915_SCHEDULER_CAP_ENABLED; + + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && + i915_modparams.enable_execlists && + !i915_modparams.enable_guc_submission) + value |= I915_SCHEDULER_CAP_PREEMPTION; + } break; + case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e5997e818673..de777139f6a1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1382,10 +1382,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(&execlists->port[0])) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index d3a6fe5e3c10..745b6a6e0188 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -424,6 +424,7 @@ static const struct intel_device_info intel_cherryview_info __initconst = { #define GEN9_FEATURES \ GEN8_FEATURES, \ + .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ .has_ipc = 1, \ @@ -477,6 +478,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_preemption = 1, \ .has_guc = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5821762d9007..c5b76082d695 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -208,9 +208,9 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -429,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlists.port; @@ -454,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + elsp_write(desc, elsp); } } @@ -488,26 +493,43 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce = + &engine->i915->preempt_context->engine[engine->id]; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; + + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); + + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); + ce->ring->tail += WA_TAIL_BYTES; + ce->ring->tail &= (ce->ring->size - 1); + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + + for (n = execlists_num_ports(&engine->execlists); --n; ) + elsp_write(0, elsp); + + elsp_write(ce->lrc_desc, elsp); +} + +static bool can_preempt(struct intel_engine_cs *engine) +{ + return INTEL_INFO(engine->i915)->has_logical_ring_preemption; +} + static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_breadcrumb() - * for where we prepare the padding after the end of the - * request. - */ - last->tail = last->wa_tail; - /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -532,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + if (!rb) + goto unlock; + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + if (port_count(&port[0]) > 1) + goto unlock; + + if (can_preempt(engine) && + rb_entry(rb, struct i915_priolist, node)->priority > + max(last->priotree.priority, 0)) { + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + inject_preempt_context(engine); + execlists->preempt = true; + goto unlock; + } else { + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). + */ + if (port_count(&port[1])) + goto unlock; + + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == req:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -595,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) port_assign(port, last); +unlock: spin_unlock_irq(&engine->timeline->lock); if (submit) @@ -680,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static bool execlists_elsp_ready(const struct intel_engine_cs *engine) -{ - const struct execlist_port *port = engine->execlists.port; - - return port_count(&port[0]) + port_count(&port[1]) < 2; -} - /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -695,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; + struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -780,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && + buf[2*head + 1] == PREEMPT_ID) { + execlist_cancel_port_requests(execlists); + + spin_lock_irq(&engine->timeline->lock); + unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); + + GEM_BUG_ON(!execlists->preempt); + execlists->preempt = false; + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists->preempt) + continue; + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -811,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (execlists_elsp_ready(engine)) + if (!execlists->preempt) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -824,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&engine->execlists.irq_tasklet); } @@ -954,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) } spin_unlock_irq(&engine->timeline->lock); - - /* XXX Do we need to preempt to make room for us and our deps? */ } static struct intel_ring * @@ -1151,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1166,6 +1257,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1211,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = 0; } + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1364,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; + execlists->preempt = false; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) @@ -1659,7 +1755,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, */ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *cs++ = MI_NOOP; + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); } @@ -1680,7 +1777,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, @@ -1708,7 +1804,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 56d7ae9f298b..0fedda17488c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -238,6 +238,11 @@ struct intel_engine_execlists { #define EXECLIST_MAX_PORTS 2 } port[EXECLIST_MAX_PORTS]; + /** + * @preempt: are we currently handling a preempting context switch? + */ + bool preempt; + /** * @port_mask: number of execlist ports - 1 */ From ac14fbd460d0ec16e7750e40dcd8199b0ff83d0a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Oct 2017 21:34:53 +0100 Subject: [PATCH 038/145] drm/i915/scheduler: Support user-defined priorities Use a priority stored in the context as the initial value when submitting a request. This allows us to change the default priority on a per-context basis, allowing different contexts to be favoured with GPU time at the expense of lower importance work. The user can adjust the context's priority via I915_CONTEXT_PARAM_PRIORITY, with more positive values being higher priority (they will be serviced earlier, after their dependencies have been resolved). Any prerequisite work for an execbuf will have its priority raised to match the new request as required. Normal users can specify any value in the range of -1023 to 0 [default], i.e. they can reduce the priority of their workloads (and temporarily boost it back to normal if so desired). Privileged users can specify any value in the range of -1023 to 1023, [default is 0], i.e. they can raise their priority above all overs and so potentially starve the system. Note that the existing schedulers are not fair, nor load balancing, the execution is strictly by priority on a first-come, first-served basis, and the driver may choose to boost some requests above the range available to users. This priority was originally based around nice(2), but evolved to allow clients to adjust their priority within a small range, and allow for a privileged high priority range. For example, this can be used to implement EGL_IMG_context_priority https://www.khronos.org/registry/egl/extensions/IMG/EGL_IMG_context_priority.txt EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of the context to be created. This attribute is a hint, as an implementation may not support multiple contexts at some priority levels and system policy may limit access to high priority contexts to appropriate system privilege level. The default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is EGL_CONTEXT_PRIORITY_MEDIUM_IMG." so we can map PRIORITY_HIGH -> 1023 [privileged, will failback to 0] PRIORITY_MED -> 0 [default] PRIORITY_LOW -> -1023 They also map onto the priorities used by VkQueue (and a VkQueue is essentially a timeline, our i915_gem_context under full-ppgtt). v2: s/CAP_SYS_ADMIN/CAP_SYS_NICE/ v3: Report min/max user priorities as defines in the uapi, and rebase internal priorities on the exposed values. Testcase: igt/gem_exec_schedule Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171003203453.15692-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 14 ++++++++++---- include/uapi/drm/i915_drm.h | 7 +++++++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7614880edad8..66fc156b294a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -370,6 +370,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 0; if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { value |= I915_SCHEDULER_CAP_ENABLED; + value |= I915_SCHEDULER_CAP_PRIORITY; if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && i915_modparams.enable_execlists && diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2bb8e58706ba..5bf96a258509 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1070,6 +1070,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BANNABLE: args->value = i915_gem_context_is_bannable(ctx); break; + case I915_CONTEXT_PARAM_PRIORITY: + args->value = ctx->priority; + break; default: ret = -EINVAL; break; @@ -1125,6 +1128,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, else i915_gem_context_clear_bannable(ctx); break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + int priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!to_i915(dev)->engine[RCS]->schedule) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->priority = priority; + } + break; + default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6b9e992d01de..26249f39de67 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,8 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +#include + struct drm_file; struct drm_i915_gem_object; struct drm_i915_gem_request; @@ -69,10 +71,14 @@ struct i915_priotree { struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; int priority; -#define I915_PRIORITY_MAX 1024 -#define I915_PRIORITY_NORMAL 0 -#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) -#define I915_PRIORITY_INVALID INT_MIN +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN }; struct i915_gem_capture_list { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index aa4a3b20ef6b..7266b53191ee 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -402,6 +402,9 @@ typedef struct drm_i915_irq_wait { * priorities and the driver will attempt to execute batches in priority order. * The param returns a capability bitmask, nonzero implies that the scheduler * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. */ #define I915_PARAM_HAS_SCHEDULER 41 #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) @@ -1367,6 +1370,10 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 #define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ __u64 value; }; From 5ea2355a100a3c6304901d058aee06d3a6be69bc Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 3 Oct 2017 17:22:11 +0300 Subject: [PATCH 039/145] drm/i915/mst: Use MST sideband message transactions for dpms control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the POWER_DOWN_PHY and POWER_UP_PHY sideband message transactions to set power states for downstream sinks. Apart from giving us the ability to set power state for individual sinks, this fixes the below test for me. $ xrandr --display :0 --output DP-2-2-8 --off $ xrandr --display :0 --output DP-2-2-1 --off $ xrandr --display :0 --output DP-2-2-8 --auto #Black screen $ xrandr --display :0 --output DP-2-2-1 --auto v2: Modify and document the dpms and port disable order (Ville) Add comment explaining is_mst = !crtc_state equivalence(Ville, Maarten) v3 by Jani: rebase References: https://bugs.freedesktop.org/show_bug.cgi?id=90963 References: https://bugs.freedesktop.org/show_bug.cgi?id=88124 Cc: Ville Syrjälä Cc: Lyude Cc: Maarten Lankhorst Reviewed-by: Lyude Paul Acked-by: Maarten Lankhorst Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171003142211.860-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 18 ++++++++++++++---- drivers/gpu/drm/i915/intel_dp_mst.c | 10 ++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 65f4b6786791..511aa60e0176 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2162,7 +2162,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!link_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2236,12 +2237,21 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, uint32_t val; bool wait = false; - /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + bool is_mst = !old_crtc_state; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } val = I915_READ(DDI_BUF_CTL(port)); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 9a396f483f8b..3c131e2544cf 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -162,14 +162,19 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + /* + * Power down mst path before disabling the port, otherwise we end + * up getting interrupts from the sink upon detecting link loss. + */ + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, + false); + intel_dp->active_mst_links--; intel_mst->connector = NULL; if (intel_dp->active_mst_links == 0) { intel_dig_port->base.post_disable(&intel_dig_port->base, NULL, NULL); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -196,6 +201,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); From cbacf02e7796fea02e5c6e46c90ed7cbe9e6f2c0 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 4 Oct 2017 09:48:26 -0700 Subject: [PATCH 040/145] drm/i915/edp: Get the Panel Power Off timestamp after panel is off Kernel stores the time in jiffies at which the eDP panel is turned off. This should be obtained after the panel is off (after the wait_panel_off). When we next attempt to turn the panel on, we use the difference between the timestamp at which we want to turn the panel on and timestamp at which panel was turned off to ensure that this is equal to panel power cycle delay and if not we wait for the remaining time. Not waiting for the panel power cycle delay can cause the panel to not turn on giving rise to AUX timeouts for the attempted AUX transactions. v2: * Separate lines for bugzilla (Jani Nikula) * Suggested by tag (Daniel Vetter) Cc: Daniel Vetter Cc: Jani Nikula Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101518 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101144 Suggested-by: Daniel Vetter Signed-off-by: Manasi Navare Reviewed-by: Daniel Vetter Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1507135706-17147-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 90e756c76f10..0fd41cdc68f1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2308,8 +2308,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ intel_display_power_put(dev_priv, intel_dp->aux_power_domain); From c02b8fb4073d1b9aa5af909a91b51056b819d946 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 3 Oct 2017 16:37:25 -0700 Subject: [PATCH 041/145] drm/i915/edp: Increase the T12 delay quirk to 1300ms For this specific PCI device, the eDP panel requires a higher panel power cycle delay of 1300ms where the minimum spec requirement of panel power cycle delay is 500ms. This fix in combination with correct timestamp at which we get the panel power off time fixes the dP AUX CH timeouts seen on various IGT tests. Fixes: c99a259b4b5192ba ("drm/i915/edp: Add a T12 panel delay quirk to fix DP AUX CH timeouts") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101144 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101518 Cc: Daniel Vetter Cc: Jani Nikula Cc: Ville Syrjala Signed-off-by: Manasi Navare Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1507073845-13420-2-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0fd41cdc68f1..ca48bce23a6f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5286,7 +5286,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, * seems sufficient to avoid this problem. */ if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", vbt.t11_t12); } From e91ef99b95439fc634e9c9753887d363a848f452 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Oct 2017 11:59:27 +0100 Subject: [PATCH 042/145] drm/i915/selftests: Remember to create the fake preempt context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the fake device we have our own set of mock contexts that need to match the real contexts we normally create. Currently this requires us to manually instantiate them for the selftests, which I forgot. Reported-by: Matthew Auld Fixes: e7af3116836f ("drm/i915: Introduce a preempt context") Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Joonas Lahtinen Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171005105927.22991-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2388424a14da..d01b7ec2d68d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -239,8 +239,14 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->kernel_context) goto err_engine; + i915->preempt_context = mock_context(i915, NULL); + if (!i915->preempt_context) + goto err_kernel_context; + return i915; +err_kernel_context: + i915_gem_context_put(i915->kernel_context); err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); From 764d2997ec0edbb48f12e6477ccaceecd999091d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:07:22 +0200 Subject: [PATCH 043/145] drm/i915/selftests: fix check for intel IOMMU An earlier bugfix tried to work around this build failure: drivers/gpu/drm/i915/selftests/mock_gem_device.c: In function 'mock_gem_device': drivers/gpu/drm/i915/selftests/mock_gem_device.c:151:20: error: 'struct dev_archdata' has no member named 'iommu' Checking for CONFIG_IOMMU_API is not sufficient as a compile-time test since that may be enabled in configurations that have neither INTEL_IOMMU not AMD_IOMMU enabled. This changes the check to INTEL_IOMMU instead, as this is the only case we actually care about. Fixes: f46f156ea770 ("drm/i915/selftests: Only touch archdata.iommu when it exists") Signed-off-by: Arnd Bergmann Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171005120749.400818-1-arnd@arndb.de --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d01b7ec2d68d..0c8225d677e9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -146,7 +146,7 @@ struct drm_i915_private *mock_gem_device(void) dev_set_name(&pdev->dev, "mock"); dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); -#if IS_ENABLED(CONFIG_IOMMU_API) +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) /* hack to disable iommu for the fake device; force identity mapping */ pdev->dev.archdata.iommu = (void *)-1; #endif From 83482ca3b4fe7312bf3d1f25c7a4e9ff66585eab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Oct 2017 14:08:26 +0200 Subject: [PATCH 044/145] drm/i915: avoid potential uninitialized variable use One of the recent changes introduced a warning about undefined behavior in the sanity checking: drivers/gpu/drm/i915/intel_ddi.c: In function 'intel_ddi_hdmi_level': drivers/gpu/drm/i915/intel_ddi.c:654:6: error: 'n_hdmi_entries' may be used uninitialized in this function [-Werror=maybe-uninitialized] It seems that the new cnl specific get_buf_trans functions can return uninitialized data if the voltage level is set to an unexpected value. This changes the code to always return '1' in that error case, which seems like the safest choice as we use one less than the number as an array index later on. Fixes: cc9cabfdec38 ("drm/i915/cnl: Move voltage check into ddi buf trans functions.") Signed-off-by: Arnd Bergmann [danvet: shut up gcc comment added.] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171005120835.437022-1-arnd@arndb.de --- drivers/gpu/drm/i915/intel_ddi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 511aa60e0176..f7c91bb9d13e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -602,8 +602,10 @@ cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); return cnl_ddi_translations_hdmi_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -621,8 +623,10 @@ cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); return cnl_ddi_translations_dp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -641,8 +645,10 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); return cnl_ddi_translations_edp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } else { return cnl_get_buf_trans_dp(dev_priv, n_entries); From 3cf1934abe88103fcbeff88bbaee8a094502e6cb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 4 Oct 2017 08:39:52 -0700 Subject: [PATCH 045/145] drm/i915/cnl: Do not add an extra page for precaution in the Gen10 LRC size BSpec indicates exactly 16752 DWORDs (17 pages), plus one page for PPHWSP. Please notice that, when looking at the BSpec context image table, the right filter has to be applied (e.g. "CNL") as some rows are excluded for specific GENs. BSpec: 1383 v2: Update count and add BSpec tag (Joonas) v3: Warning about filters in the commit message (Joonas) Suggested-by: Joonas Lahtinen Fixes: 7fd0b1a ("drm/i915/cnl: Add Gen10 LRC size") Signed-off-by: Oscar Mateo Cc: Rodrigo Vivi Cc: Daniele Ceraolo Spurio Cc: Ben Widawsky Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/1507131592-29209-1-git-send-email-oscar.mateo@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8b8053d8e24b..807a7aafc089 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -39,7 +39,7 @@ #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * PAGE_SIZE) +#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) From d62e2bf3897023c61db6b47138f615b5ba425563 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 18:13:39 +0000 Subject: [PATCH 046/145] drm/i915/guc: Move GuC log declarations into dedicated header Move GuC log declarations into dedicated header as we want to keep component specific code in separate files. v2: fix includes (Chris) update commit message (Joonas) Suggested-by: Joonas Lahtinen Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004181343.66348-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc_log.c | 3 ++ drivers/gpu/drm/i915/intel_guc_log.h | 59 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.h | 26 +----------- 3 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc_log.h diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 6571d96704ad..b7317a13354a 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -21,8 +21,11 @@ * IN THE SOFTWARE. * */ + #include #include + +#include "intel_guc_log.h" #include "i915_drv.h" static void guc_log_capture_logs(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h new file mode 100644 index 000000000000..b5ec374d9b15 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_LOG_H_ +#define _INTEL_GUC_LOG_H_ + +#include + +#include "intel_guc_fwif.h" + +struct drm_i915_private; +struct intel_guc; + +struct intel_guc_log { + uint32_t flags; + struct i915_vma *vma; + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; + /* logging related stats */ + u32 capture_miss_count; + u32 flush_interrupt_count; + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 flush_count[GUC_MAX_LOG_BUFFER]; +}; + +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); +void i915_guc_log_register(struct drm_i915_private *dev_priv); +void i915_guc_log_unregister(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 4fa091e90b5f..86ae5070185e 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -29,6 +29,7 @@ #include "i915_guc_reg.h" #include "intel_ringbuffer.h" #include "intel_guc_ct.h" +#include "intel_guc_log.h" #include "i915_vma.h" #include "intel_huc.h" @@ -72,24 +73,6 @@ struct i915_guc_client { uint64_t submissions[I915_NUM_ENGINES]; }; -struct intel_guc_log { - uint32_t flags; - struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ - struct { - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - } runtime; - /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; -}; - struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; @@ -165,13 +148,6 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); -/* intel_guc_log.c */ -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); - static inline u32 guc_ggtt_offset(struct i915_vma *vma) { u32 offset = i915_ggtt_offset(vma); From 9f436c46ea09347e0ee050e819f486bbab047e6c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 18:13:40 +0000 Subject: [PATCH 047/145] drm/i915/guc: Move GuC submission declarations into dedicated header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move GuC submission declarations into dedicated header as we want to keep uC specific code in separate files. v2: fix include (Chris) update commit message (Joonas) Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Sagar Arun Kamble Cc: MichaĹ Winiarski Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004181343.66348-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_guc_submission.c | 7 +- drivers/gpu/drm/i915/i915_guc_submission.h | 80 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_uc.c | 1 + drivers/gpu/drm/i915/intel_uc.h | 45 ------------ 5 files changed, 86 insertions(+), 48 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.h diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b4a6ac60e7c6..44aae25d12c7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -30,6 +30,7 @@ #include #include #include "intel_drv.h" +#include "i915_guc_submission.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 04f1281d81a5..97dfe962547e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -21,12 +21,13 @@ * IN THE SOFTWARE. * */ -#include -#include "i915_drv.h" -#include "intel_uc.h" +#include #include +#include "i915_guc_submission.h" +#include "i915_drv.h" + /** * DOC: GuC-based command submission * diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h new file mode 100644 index 000000000000..a7e61e6246ff --- /dev/null +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _I915_GUC_SUBMISSION_H_ +#define _I915_GUC_SUBMISSION_H_ + +#include + +#include "i915_gem.h" + +struct drm_i915_private; + +/* + * This structure primarily describes the GEM object shared with the GuC. + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with + * the GuC, being allocated before the GuC is loaded with its firmware. + * Because there's no way to update the address used by the GuC after + * initialisation, the shared object must stay pinned into the GGTT as + * long as the GuC is in use. We also keep the first page (only) mapped + * into kernel address space, as it includes shared data that must be + * updated on every request submission. + * + * The single GEM object described here is actually made up of several + * separate areas, as far as the GuC is concerned. The first page (kept + * kmap'd) includes the "process descriptor" which holds sequence data for + * the doorbell, and one cacheline which actually *is* the doorbell; a + * write to this will "ring the doorbell" (i.e. send an interrupt to the + * GuC). The subsequent pages of the client object constitute the work + * queue (a circular array of work items), again described in the process + * descriptor. Work queue pages are mapped momentarily as required. + */ +struct i915_guc_client { + struct i915_vma *vma; + void *vaddr; + struct i915_gem_context *owner; + struct intel_guc *guc; + + /* bitmap of (host) engine ids */ + uint32_t engines; + uint32_t priority; + u32 stage_id; + uint32_t proc_desc_offset; + + u16 doorbell_id; + unsigned long doorbell_offset; + + spinlock_t wq_lock; + /* Per-engine counts of GuC submissions */ + uint64_t submissions[I915_NUM_ENGINES]; +}; + +int i915_guc_submission_init(struct drm_i915_private *dev_priv); +int i915_guc_submission_enable(struct drm_i915_private *dev_priv); +void i915_guc_submission_disable(struct drm_i915_private *dev_priv); +void i915_guc_submission_fini(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index e7875277ba97..770bac462441 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -24,6 +24,7 @@ #include "i915_drv.h" #include "intel_uc.h" +#include "i915_guc_submission.h" #include /* Reset GuC providing us with fresh state for both GuC and HuC. diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 86ae5070185e..2f741a97007e 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -33,46 +33,6 @@ #include "i915_vma.h" #include "intel_huc.h" -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct i915_guc_client { - struct i915_vma *vma; - void *vaddr; - struct i915_gem_context *owner; - struct intel_guc *guc; - - uint32_t engines; /* bitmap of (host) engine ids */ - uint32_t priority; - u32 stage_id; - uint32_t proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - spinlock_t wq_lock; - /* Per-engine counts of GuC submissions */ - uint64_t submissions[I915_NUM_ENGINES]; -}; - struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; @@ -141,11 +101,6 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv); int intel_guc_resume(struct drm_i915_private *dev_priv); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); -/* i915_guc_submission.c */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv); -int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -void i915_guc_submission_disable(struct drm_i915_private *dev_priv); -void i915_guc_submission_fini(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); static inline u32 guc_ggtt_offset(struct i915_vma *vma) From 9bf384c5f21235d039bccf5d6e497c2c27986367 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 18:13:41 +0000 Subject: [PATCH 048/145] drm/i915/guc: Move GuC core definitions into dedicated files Move GuC core definitions into dedicated files as we want to keep GuC specific code in separated files. v2: move all functions in single patch (Joonas) fix old checkpatch issues (Sagar) v3: rebased Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble #1 Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004181343.66348-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_guc_submission.c | 94 -------- drivers/gpu/drm/i915/intel_guc.c | 264 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 109 +++++++++ drivers/gpu/drm/i915/intel_uc.c | 146 +----------- drivers/gpu/drm/i915/intel_uc.h | 78 +----- 6 files changed, 377 insertions(+), 315 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc.c create mode 100644 drivers/gpu/drm/i915/intel_guc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4850f260aead..51d0d2929a4b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -60,6 +60,7 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ intel_uc_fw.o \ + intel_guc.o \ intel_guc_ct.o \ intel_guc_log.o \ intel_guc_loader.o \ diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 97dfe962547e..7460ab4a214c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -644,48 +644,6 @@ static void i915_guc_irq_handler(unsigned long data) * path of i915_guc_submit() above. */ -/** - * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage - * @guc: the guc - * @size: size of area to allocate (both virtual space and memory) - * - * This is a wrapper to create an object for use with the GuC. In order to - * use it inside the GuC, an object needs to be pinned lifetime, so we allocate - * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that - * range is reserved inside GuC. - * - * Return: A i915_vma if successful, otherwise an ERR_PTR. - */ -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err; - - ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (ret) { - vma = ERR_PTR(ret); - goto err; - } - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - /* Check that a doorbell register is in the expected state */ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { @@ -1213,55 +1171,3 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) guc_client_free(guc->execbuf_client); guc->execbuf_client = NULL; } - -/** - * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private - */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} - -/** - * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private - */ -int intel_guc_resume(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915_modparams.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c new file mode 100644 index 000000000000..bbe4c328e9fd --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -0,0 +1,264 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc.h" +#include "i915_drv.h" + +static void gen8_guc_raise_irq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +void intel_guc_init_send_regs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + +void intel_guc_init_early(struct intel_guc *guc) +{ + intel_guc_ct_init_early(&guc->ct); + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_nop; + guc->notify = gen8_guc_raise_irq; +} + +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); + + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + + mutex_lock(&guc->send_mutex); + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); + + for (i = 0; i < len; i++) + I915_WRITE(guc_send_reg(guc, i), action[i]); + + POSTING_READ(guc_send_reg(guc, i - 1)); + + intel_guc_notify(guc); + + /* + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. + */ + ret = __intel_wait_for_register_fw(dev_priv, + guc_send_reg(guc, 0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); + if (status != INTEL_GUC_STATUS_SUCCESS) { + /* + * Either the GuC explicitly returned an error (which + * we convert to -EIO here) or no response at all was + * received within the timeout limit (-ETIMEDOUT) + */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + } + + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); + mutex_unlock(&guc->send_mutex); + + return ret; +} + +int intel_guc_sample_forcewake(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 action[2]; + + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; + /* WaRsDisableCoarsePowerGating:skl,bxt */ + if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + action[1] = 0; + else + /* bit 0 and 1 are for Render and Media domain separately */ + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode + * @guc: intel_guc structure + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma + * + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by + * intel_huc_auth(). + * + * Return: non-zero code on error + */ +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_AUTHENTICATE_HUC, + rsa_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_suspend() - notify GuC entering suspend state + * @dev_priv: i915 device private + */ +int intel_guc_suspend(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + gen9_disable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; + /* any value greater than GUC_POWER_D0 */ + data[1] = GUC_POWER_D1; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_resume() - notify GuC resuming from suspend state + * @dev_priv: i915 device private + */ +int intel_guc_resume(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; + data[1] = GUC_POWER_D0; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. + * + * Return: A i915_vma if successful, otherwise an ERR_PTR. + */ +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create(dev_priv, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; + + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h new file mode 100644 index 000000000000..094d64980882 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -0,0 +1,109 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_uncore.h" +#include "intel_guc_fwif.h" +#include "intel_guc_ct.h" +#include "intel_guc_log.h" +#include "intel_uc_fw.h" +#include "i915_guc_reg.h" +#include "i915_vma.h" + +struct intel_guc { + struct intel_uc_fw fw; + struct intel_guc_log log; + struct intel_guc_ct ct; + + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *load_err_log; + + /* intel_guc_recv interrupt related state */ + bool interrupts_enabled; + + struct i915_vma *ads_vma; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; + + struct i915_guc_client *execbuf_client; + + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); + uint32_t db_cacheline; /* Cyclic counter mod pagesize */ + + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); +}; + +static +inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} + +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + +static inline u32 guc_ggtt_offset(struct i915_vma *vma) +{ + u32 offset = i915_ggtt_offset(vma); + + GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); + + return offset; +} + +void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_send_regs(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); +int intel_guc_suspend(struct drm_i915_private *dev_priv); +int intel_guc_resume(struct drm_i915_private *dev_priv); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); + +int intel_guc_select_fw(struct intel_guc *guc); +int intel_guc_init_hw(struct intel_guc *guc); +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 770bac462441..cb9f13f54a80 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -82,25 +82,9 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } -static void gen8_guc_raise_irq(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} - -static void guc_init_early(struct intel_guc *guc) -{ - intel_guc_ct_init_early(&guc->ct); - - mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_nop; - guc->notify = gen8_guc_raise_irq; -} - void intel_uc_init_early(struct drm_i915_private *dev_priv) { - guc_init_early(&dev_priv->guc); + intel_guc_init_early(&dev_priv->guc); } void intel_uc_init_fw(struct drm_i915_private *dev_priv) @@ -115,32 +99,6 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) intel_uc_fw_fini(&dev_priv->huc.fw); } -static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) -{ - GEM_BUG_ON(!guc->send_regs.base); - GEM_BUG_ON(!guc->send_regs.count); - GEM_BUG_ON(i >= guc->send_regs.count); - - return _MMIO(guc->send_regs.base + 4 * i); -} - -static void guc_init_send_regs(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - enum forcewake_domains fw_domains = 0; - unsigned int i; - - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); - guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; - - for (i = 0; i < guc->send_regs.count; i++) { - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - guc_send_reg(guc, i), - FW_REG_READ | FW_REG_WRITE); - } - guc->send_regs.fw_domains = fw_domains; -} - /** * intel_uc_init_mmio - setup uC MMIO access * @@ -151,7 +109,7 @@ static void guc_init_send_regs(struct intel_guc *guc) */ void intel_uc_init_mmio(struct drm_i915_private *dev_priv) { - guc_init_send_regs(&dev_priv->guc); + intel_guc_init_send_regs(&dev_priv->guc); } static void guc_capture_load_err_log(struct intel_guc *guc) @@ -192,27 +150,6 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } -/** - * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode - * @guc: intel_guc structure - * @rsa_offset: rsa offset w.r.t ggtt base of huc vma - * - * Triggers a HuC firmware authentication request to the GuC via intel_guc_send - * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by - * intel_huc_auth(). - * - * Return: non-zero code on error - */ -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) -{ - u32 action[] = { - INTEL_GUC_ACTION_AUTHENTICATE_HUC, - rsa_offset - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -344,82 +281,3 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) i915_ggtt_disable_guc(dev_priv); } - -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} - -/* - * This function implements the MMIO based host to GuC interface. - */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 status; - int i; - int ret; - - GEM_BUG_ON(!len); - GEM_BUG_ON(len > guc->send_regs.count); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(HAS_GUC_CT(dev_priv) && - *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); - - mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); - - for (i = 0; i < len; i++) - I915_WRITE(guc_send_reg(guc, i), action[i]); - - POSTING_READ(guc_send_reg(guc, i - 1)); - - intel_guc_notify(guc); - - /* - * No GuC command should ever take longer than 10ms. - * Fast commands should still complete in 10us. - */ - ret = __intel_wait_for_register_fw(dev_priv, - guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, - 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; - - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); - } - - intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); - mutex_unlock(&guc->send_mutex); - - return ret; -} - -int intel_guc_sample_forcewake(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 action[2]; - - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - action[1] = 0; - else - /* bit 0 and 1 are for Render and Media domain separately */ - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 2f741a97007e..e18d3bb02088 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -24,54 +24,9 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ -#include "intel_uc_fw.h" -#include "intel_guc_fwif.h" -#include "i915_guc_reg.h" -#include "intel_ringbuffer.h" -#include "intel_guc_ct.h" -#include "intel_guc_log.h" -#include "i915_vma.h" +#include "intel_guc.h" #include "intel_huc.h" -struct intel_guc { - struct intel_uc_fw fw; - struct intel_guc_log log; - struct intel_guc_ct ct; - - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - - /* intel_guc_recv interrupt related state */ - bool interrupts_enabled; - - struct i915_vma *ads_vma; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; - struct ida stage_ids; - - struct i915_guc_client *execbuf_client; - - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ - - /* GuC's FW specific registers used in MMIO send */ - struct { - u32 base; - unsigned int count; - enum forcewake_domains fw_domains; - } send_regs; - - /* To serialize the intel_guc_send actions */ - struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); -}; - -/* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv); @@ -79,36 +34,5 @@ void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -int intel_guc_sample_forcewake(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); - -static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) -{ - return guc->send(guc, action, len); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); -} - -/* intel_guc_loader.c */ -int intel_guc_select_fw(struct intel_guc *guc); -int intel_guc_init_hw(struct intel_guc *guc); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); - -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); - -static inline u32 guc_ggtt_offset(struct i915_vma *vma) -{ - u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); - return offset; -} #endif From ddf79d88f3935604f3b7e3031a2325b7682a0518 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 18:13:42 +0000 Subject: [PATCH 049/145] drm/i915/uc: Fix includes order Fix includes order and make sure we only include required headers. Suggested-by: Chris Wilson Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004181343.66348-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index cb9f13f54a80..7b938e822fde 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,10 +22,9 @@ * */ -#include "i915_drv.h" #include "intel_uc.h" +#include "i915_drv.h" #include "i915_guc_submission.h" -#include /* Reset GuC providing us with fresh state for both GuC and HuC. */ From 959a3b6f57996af1c20232abeb7b0efd355134cc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Oct 2017 18:13:43 +0000 Subject: [PATCH 050/145] drm/i915/uc: Unify initialization of the uC firmware helper Unify initialization of the uC firmware helper as we want to maximize code reuse. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171004181343.66348-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 5 +---- drivers/gpu/drm/i915/intel_huc.c | 5 +---- drivers/gpu/drm/i915/intel_uc_fw.h | 9 +++++++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index c9e25be4db40..c7a800a3798d 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -386,10 +386,7 @@ int intel_guc_select_fw(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc->fw.path = NULL; - guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.type = INTEL_UC_FW_TYPE_GUC; + intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); if (i915_modparams.guc_firmware_path) { guc->fw.path = i915_modparams.guc_firmware_path; diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 8b4b53525422..3f796fed35af 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -150,10 +150,7 @@ void intel_huc_select_fw(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); - huc->fw.path = NULL; - huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.type = INTEL_UC_FW_TYPE_HUC; + intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); if (i915_modparams.huc_firmware_path) { huc->fw.path = i915_modparams.huc_firmware_path; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 849b2ce685c0..fcaf59718300 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -91,6 +91,15 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) return "uC"; } +static inline +void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +{ + uc_fw->path = NULL; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->type = type; +} + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); From 51c4fa6903f958dff1221b2986329b9c6de9db57 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Oct 2017 13:52:10 +0300 Subject: [PATCH 051/145] drm/i915: push DDI CRT underrun reporting on enable to encoder CRT being the only PCH encoder, we can simplify the crtc enable by pushing the PCH encoder specific parts to CRT encoder. v2: add separate hsw_enable_crt (Daniel), rebase Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/b117fb524fc571837c83292194fc2ad35e588ec9.1507200657.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 51 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_display.c | 14 +------- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 954070255b4d..6d57c92ac999 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -227,6 +227,52 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); } +static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + +static void hsw_pre_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); +} + +static void hsw_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + + intel_wait_for_vblank(dev_priv, pipe); + intel_wait_for_vblank(dev_priv, pipe); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); +} + static void intel_enable_crt(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) @@ -897,7 +943,6 @@ void intel_crt_init(struct drm_i915_private *dev_priv) } else { crt->base.disable = intel_disable_crt; } - crt->base.enable = intel_enable_crt; if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; @@ -905,11 +950,15 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; + crt->base.pre_enable = hsw_pre_enable_crt; + crt->base.enable = hsw_enable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { crt->base.port = PORT_NONE; crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; + crt->base.enable = intel_enable_crt; } intel_connector->get_hw_state = intel_connector_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cdb2e25a577c..6f275c930fd5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5488,9 +5488,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (WARN_ON(intel_crtc->active)) return; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (intel_crtc->config->shared_dpll) @@ -5524,9 +5521,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->active = true; - if (intel_crtc->config->has_pch_encoder) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - else + if (!intel_crtc->config->has_pch_encoder) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_encoders_pre_enable(crtc, pipe_config, old_state); @@ -5581,13 +5576,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); } - if (intel_crtc->config->has_pch_encoder) { - intel_wait_for_vblank(dev_priv, pipe); - intel_wait_for_vblank(dev_priv, pipe); - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); - } - /* If we change the relative order between pipe/planes enabling, we need * to change the workaround. */ hsw_workaround_pipe = pipe_config->hsw_workaround_pipe; From 3daa3cee6ebc3e55da5ad70d61482029905f4b7d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Oct 2017 13:52:11 +0300 Subject: [PATCH 052/145] drm/i915: push DDI CRT underrun reporting on disable to encoder CRT being the only PCH encoder, we can simplify the crtc disable by pushing the PCH encoder specific parts to CRT encoder. v2: add hsw_disable_crt (Daniel), rebase Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1ecbbb946f7b3782ee3d97f0ec2fe4758c349311.1507200657.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 18 ++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 6 ------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 6d57c92ac999..b43e8e16da37 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -213,6 +213,19 @@ static void pch_post_disable_crt(struct intel_encoder *encoder, intel_disable_crt(encoder, old_crtc_state, old_conn_state); } +static void hsw_disable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_crtc *crtc = old_crtc_state->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + static void hsw_post_disable_crt(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -225,6 +238,10 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, lpt_disable_iclkip(dev_priv); intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); + + WARN_ON(!old_crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, @@ -953,6 +970,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; crt->base.pre_enable = hsw_pre_enable_crt; crt->base.enable = hsw_enable_crt; + crt->base.disable = hsw_disable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { crt->base.port = PORT_NONE; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6f275c930fd5..9058cdfb0649 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5669,9 +5669,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); @@ -5696,9 +5693,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - if (old_crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void i9xx_pfit_enable(struct intel_crtc *crtc) From 364a3fe1823549625f2f1ff1cb26733667052da7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Oct 2017 13:52:12 +0300 Subject: [PATCH 053/145] drm/i915: push DDI and DSI underrun reporting on enable to encoder Simplify CRTC enable. v2: Don't forget DSI (Daniel) Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1ad8f3ccaed99a5f03e3d53345221a25ad0be50f.1507200657.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 8 ++++++++ drivers/gpu/drm/i915/intel_display.c | 3 --- drivers/gpu/drm/i915/intel_dsi.c | 7 ++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f7c91bb9d13e..ce4b7335a78a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2213,8 +2213,16 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; int type = encoder->type; + WARN_ON(intel_crtc->config->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { intel_ddi_pre_enable_dp(encoder, pipe_config->port_clock, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9058cdfb0649..b55944d8149b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5521,9 +5521,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->active = true; - if (!intel_crtc->config->has_pch_encoder) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 20a7b004ffd7..66bbedc5fa01 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -790,14 +790,19 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; DRM_DEBUG_KMS("\n"); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + /* * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. From 27d81c28eef1d44ddb95980e681d28807e9bce54 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Oct 2017 13:52:13 +0300 Subject: [PATCH 054/145] drm/i915: push DDI FDI link training on enable to CRT encoder Reduce encoder specific checks from CRTC code. Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/6903136894ffcf47f7511900f3c8d53f760ef6b1.1507200657.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 2 ++ drivers/gpu/drm/i915/intel_display.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index b43e8e16da37..2abe556ccaf5 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -269,6 +269,8 @@ static void hsw_pre_enable_crt(struct intel_encoder *encoder, WARN_ON(!intel_crtc->config->has_pch_encoder); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); } static void hsw_enable_crt(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b55944d8149b..9f2bf3b3f759 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5523,9 +5523,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); - if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(pipe_config); From c5ce4ef3282bd42fd54cf581591d54f2b0a10c3b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Oct 2017 13:52:14 +0300 Subject: [PATCH 055/145] drm/i915/crt: clean up encoder hook assignment Only assign the hooks once instead of overwriting for DDI. Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/796b631d018fdd5aa2a3608c6a9b9fa196a50f76.1507200657.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2abe556ccaf5..668e8c3e791d 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -955,16 +955,11 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; - crt->base.compute_config = intel_crt_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { - crt->base.disable = pch_disable_crt; - crt->base.post_disable = pch_post_disable_crt; - } else { - crt->base.disable = intel_disable_crt; - } if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; + + crt->base.compute_config = intel_crt_compute_config; if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; @@ -975,6 +970,12 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.disable = hsw_disable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { + if (HAS_PCH_SPLIT(dev_priv)) { + crt->base.disable = pch_disable_crt; + crt->base.post_disable = pch_post_disable_crt; + } else { + crt->base.disable = intel_disable_crt; + } crt->base.port = PORT_NONE; crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; From 0c3767b28186c8129f2a2cfec06a93dcd6102391 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Oct 2017 16:15:20 +0200 Subject: [PATCH 056/145] drm/i915: Use crtc_state_is_legacy_gamma in intel_color_check crtc_state_is_legacy_gamma also checks for CTM, which was missing from intel_color_check. By using the same condition for commit and check we reduce the chance of mismatches. This was spotted by KASAN while trying to rework kms_color igt test. [ 72.008660] ================================================================== [ 72.009326] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.009519] Read of size 2 at addr ffff880220216e50 by task kms_color/1158 [ 72.009900] CPU: 2 PID: 1158 Comm: kms_color Tainted: G U W 4.14.0-rc3-patser+ #5281 [ 72.009921] Hardware name: GIGABYTE GB-BKi3A-7100/MFLP3AP-00, BIOS F1 07/27/2016 [ 72.009941] Call Trace: [ 72.009968] dump_stack+0xc5/0x151 [ 72.009996] ? _atomic_dec_and_lock+0x10f/0x10f [ 72.010024] ? show_regs_print_info+0x3c/0x3c [ 72.010072] print_address_description+0x7f/0x240 [ 72.010108] kasan_report+0x216/0x370 [ 72.010308] ? bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.010349] __asan_load2+0x74/0x80 [ 72.010552] bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.010772] broadwell_load_luts+0x1f0/0x300 [i915] [ 72.010997] intel_color_load_luts+0x36/0x40 [i915] [ 72.011205] intel_begin_crtc_commit+0xa1/0x310 [i915] [ 72.011283] drm_atomic_helper_commit_planes_on_crtc+0xa6/0x320 [drm_kms_helper] [ 72.011316] ? wait_for_completion_io+0x460/0x460 [ 72.011524] intel_update_crtc+0xe3/0x100 [i915] [ 72.011720] skl_update_crtcs+0x360/0x3f0 [i915] [ 72.011945] ? intel_update_crtcs+0xf0/0xf0 [i915] [ 72.012010] ? drm_atomic_helper_wait_for_dependencies+0x3d9/0x400 [drm_kms_helper] [ 72.012231] intel_atomic_commit_tail+0x8db/0x1500 [i915] [ 72.012273] ? __lock_is_held+0x9c/0xc0 [ 72.012494] ? skl_update_crtcs+0x3f0/0x3f0 [i915] [ 72.012518] ? find_next_bit+0xb/0x10 [ 72.012544] ? cpumask_next+0x1a/0x20 [ 72.012745] ? i915_sw_fence_complete+0x9d/0xe0 [i915] [ 72.012938] ? __i915_sw_fence_complete+0x5d0/0x5d0 [i915] [ 72.013176] intel_atomic_commit+0x528/0x570 [i915] [ 72.013280] ? drm_atomic_get_property+0xc00/0xc00 [drm] [ 72.013466] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.013496] ? kmem_cache_alloc_trace+0x266/0x280 [ 72.013714] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.013812] drm_atomic_commit+0x77/0x80 [drm] [ 72.013911] set_property_atomic+0x14a/0x210 [drm] [ 72.014015] ? drm_object_property_get_value+0x70/0x70 [drm] [ 72.014080] ? mutex_unlock+0xd/0x10 [ 72.014292] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.014379] drm_mode_obj_set_property_ioctl+0x1cf/0x310 [drm] [ 72.014481] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.014510] ? lock_release+0x6c0/0x6c0 [ 72.014602] ? drm_is_current_master+0x46/0x60 [drm] [ 72.014706] drm_ioctl_kernel+0x148/0x1d0 [drm] [ 72.014799] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.014898] ? drm_ioctl_permit+0x100/0x100 [drm] [ 72.014936] ? kasan_check_write+0x14/0x20 [ 72.015039] drm_ioctl+0x441/0x660 [drm] [ 72.015129] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.015235] ? drm_getstats+0x20/0x20 [drm] [ 72.015287] ? ___might_sleep+0x159/0x340 [ 72.015311] ? find_held_lock+0xcf/0xf0 [ 72.015341] ? __schedule_bug+0x110/0x110 [ 72.015405] do_vfs_ioctl+0xa88/0xb10 [ 72.015449] ? ioctl_preallocate+0x1a0/0x1a0 [ 72.015487] ? selinux_capable+0x20/0x20 [ 72.015525] ? rcu_dynticks_momentary_idle+0x40/0x40 [ 72.015607] SyS_ioctl+0x4e/0x80 [ 72.015647] entry_SYSCALL_64_fastpath+0x18/0xad [ 72.015670] RIP: 0033:0x7ff74a3d04d7 [ 72.015691] RSP: 002b:00007ffc594bec08 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 72.015734] RAX: ffffffffffffffda RBX: ffffffff8718f54a RCX: 00007ff74a3d04d7 [ 72.015756] RDX: 00007ffc594bec40 RSI: 00000000c01864ba RDI: 0000000000000003 [ 72.015777] RBP: ffff880211c0ff98 R08: 0000000000000086 R09: 0000000000000000 [ 72.015799] R10: 00007ff74a691b58 R11: 0000000000000246 R12: 0000000000000355 [ 72.015821] R13: 00000000ff00eb00 R14: 0000000000000a00 R15: 00007ff746082000 [ 72.015857] ? trace_hardirqs_off_caller+0xfa/0x110 Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171005141520.23990-1-maarten.lankhorst@linux.intel.com [mlankhorst: s/crtc_state_is_legacy/&_gamma/ (danvet)] Reviewed-by: Daniel Vetter Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl") Cc: # v4.7+ --- drivers/gpu/drm/i915/intel_color.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ff9ecd211abb..b8315bca852b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,7 +74,7 @@ #define I9XX_CSC_COEFF_1_0 \ ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool crtc_state_is_legacy(struct drm_crtc_state *state) +static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && @@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) } mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); - if (!crtc_state_is_legacy(state)) { + if (!crtc_state_is_legacy_gamma(state)) { mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); } @@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(state->crtc)->pipe; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state) glk_load_degamma_lut(state); - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) uint32_t i, lut_size; uint32_t word0, word1; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); @@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc, return 0; /* - * We also allow no degamma lut and a gamma lut at the legacy + * We also allow no degamma lut/ctm and a gamma lut at the legacy * size (256 entries). */ - if (!crtc_state->degamma_lut && - crtc_state->gamma_lut && - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) + if (crtc_state_is_legacy_gamma(crtc_state)) return 0; return -EINVAL; From 822a4b673284672af697ccd66e8795f8a712a90d Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 6 Oct 2017 13:45:59 +0300 Subject: [PATCH 057/145] drm/i915: Don't use BIT() in UAPI section Lets not introduce BIT() macro requirement for UAPI for now. Fixes: 3fd3a6ffe279 ("drm/i915: Simplify i915_reg_read_ioctl") Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006104559.17312-1-joonas.lahtinen@linux.intel.com --- include/uapi/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7266b53191ee..125bde7d9504 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1318,7 +1318,7 @@ struct drm_i915_reg_read { * be specified */ __u64 offset; -#define I915_REG_READ_8B_WA BIT(0) +#define I915_REG_READ_8B_WA (1ul << 0) __u64 val; /* Return value */ }; From faf654864b25f4ca4efd416145d37b794c0b805f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 6 Oct 2017 11:49:40 +0300 Subject: [PATCH 058/145] drm/i915: Unify uC variable types to avoid flooding checkpatch.pl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the code motion mostly done, convert all the uC code away from uint??_t at once (only a couple dozen variables), so that reading the checkpatch.pl output should actually pinpoint if a new uint??_t was accidentally introduced. v2: - Include intel_uc_fw.h too (Sagar) Signed-off-by: Joonas Lahtinen Cc: Michal Wajdeczko Cc: Chris Wilson Cc: Michał Winiarski Cc: Sagar Arun Kamble Cc: Anusha Srivatsa Cc: Sujaritha Sundaresan Reviewed-by: Sagar Arun Kamble Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20171006084940.15910-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 10 ++--- drivers/gpu/drm/i915/i915_guc_submission.h | 8 ++-- drivers/gpu/drm/i915/intel_guc.h | 3 +- drivers/gpu/drm/i915/intel_guc_fwif.h | 48 +++++++++++----------- drivers/gpu/drm/i915/intel_guc_log.c | 3 +- drivers/gpu/drm/i915/intel_guc_log.h | 2 +- drivers/gpu/drm/i915/intel_uc_fw.h | 20 ++++----- 7 files changed, 48 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7460ab4a214c..a2fad82532e4 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -338,7 +338,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; - uint32_t guc_engine_id = engine->guc_id; + u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we @@ -388,13 +388,13 @@ static void guc_stage_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_cpu = (u64)__get_doorbell(client); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; desc->wq_addr = gfx_addr + GUC_DB_SIZE; desc->wq_size = GUC_WQ_SIZE; - desc->desc_private = (uintptr_t)client; + desc->desc_private = (u64)client; } static void guc_stage_desc_fini(struct intel_guc *guc, @@ -755,8 +755,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, - uint32_t engines, - uint32_t priority, + u32 engines, + u32 priority, struct i915_gem_context *ctx) { struct i915_guc_client *client; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h index a7e61e6246ff..cb4353b59059 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.h +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -59,17 +59,17 @@ struct i915_guc_client { struct intel_guc *guc; /* bitmap of (host) engine ids */ - uint32_t engines; - uint32_t priority; + u32 engines; + u32 priority; u32 stage_id; - uint32_t proc_desc_offset; + u32 proc_desc_offset; u16 doorbell_id; unsigned long doorbell_offset; spinlock_t wq_lock; /* Per-engine counts of GuC submissions */ - uint64_t submissions[I915_NUM_ENGINES]; + u64 submissions[I915_NUM_ENGINES]; }; int i915_guc_submission_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 094d64980882..aa9a7b55be6e 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -52,7 +52,8 @@ struct intel_guc { struct i915_guc_client *execbuf_client; DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ + /* Cyclic counter mod pagesize */ + u32 db_cacheline; /* GuC's FW specific registers used in MMIO send */ struct { diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 7eb6b4fa1d6f..1c0a2a3de121 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -178,49 +178,49 @@ */ struct uc_css_header { - uint32_t module_type; + u32 module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ - uint32_t header_size_dw; - uint32_t header_version; - uint32_t module_id; - uint32_t module_vendor; + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; union { struct { - uint8_t day; - uint8_t month; - uint16_t year; + u8 day; + u8 month; + u16 year; }; - uint32_t date; + u32 date; }; - uint32_t size_dw; /* uCode plus header_size_dw */ - uint32_t key_size_dw; - uint32_t modulus_size_dw; - uint32_t exponent_size_dw; + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; union { struct { - uint8_t hour; - uint8_t min; - uint16_t sec; + u8 hour; + u8 min; + u16 sec; }; - uint32_t time; + u32 time; }; char username[8]; char buildnumber[12]; union { struct { - uint32_t branch_client_version; - uint32_t sw_version; + u32 branch_client_version; + u32 sw_version; } guc; struct { - uint32_t sw_version; - uint32_t reserved; + u32 sw_version; + u32 reserved; } huc; }; - uint32_t prod_preprod_fw; - uint32_t reserved[12]; - uint32_t header_info; + u32 prod_preprod_fw; + u32 reserved[12]; + u32 header_info; } __packed; struct guc_doorbell_info { diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index b7317a13354a..76d3eb1e4614 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -528,7 +528,8 @@ int intel_guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; unsigned long offset; - uint32_t size, flags; + u32 flags; + u32 size; int ret; GEM_BUG_ON(guc->log.vma); diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h index b5ec374d9b15..f512cf79339b 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.h +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -33,7 +33,7 @@ struct drm_i915_private; struct intel_guc; struct intel_guc_log { - uint32_t flags; + u32 flags; struct i915_vma *vma; /* The runtime stuff gets created only when GuC logging gets enabled */ struct { diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index fcaf59718300..c3e9af4b9bf0 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -50,18 +50,18 @@ struct intel_uc_fw { enum intel_uc_fw_status fetch_status; enum intel_uc_fw_status load_status; - uint16_t major_ver_wanted; - uint16_t minor_ver_wanted; - uint16_t major_ver_found; - uint16_t minor_ver_found; + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; enum intel_uc_fw_type type; - uint32_t header_size; - uint32_t header_offset; - uint32_t rsa_size; - uint32_t rsa_offset; - uint32_t ucode_size; - uint32_t ucode_offset; + u32 header_size; + u32 header_offset; + u32 rsa_size; + u32 rsa_offset; + u32 ucode_size; + u32 ucode_offset; }; static inline From ead92edbd67c5f51dda7ac90136fbd05464203b6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 6 Oct 2017 09:02:09 +0000 Subject: [PATCH 059/145] drm/i915/huc: Fix includes in intel_huc.c Fix includes order and make sure we only include required headers. While here, make intel_huc.h header self-contained. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006090209.67852-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_huc.c | 6 ++++-- drivers/gpu/drm/i915/intel_huc.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 3f796fed35af..4b4cf56d29ad 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -21,9 +21,11 @@ * IN THE SOFTWARE. * */ -#include + +#include + +#include "intel_huc.h" #include "i915_drv.h" -#include "intel_uc.h" /** * DOC: HuC Firmware diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index d58422b0c85f..aaa38b9e5817 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -25,6 +25,8 @@ #ifndef _INTEL_HUC_H_ #define _INTEL_HUC_H_ +#include "intel_uc_fw.h" + struct intel_huc { /* Generic uC firmware management */ struct intel_uc_fw fw; From bb8920f5bea53317c15d1c14f2704f2deaeb9ffd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 6 Oct 2017 13:08:44 +0000 Subject: [PATCH 060/145] drm/i915: Fix pointer-to-int conversion Commit faf654864b25 ("drm/i915: Unify uC variable types to avoid flooding checkpatch.pl") breaks 32-bit kernel builds. Lets use cast helper to make compiler happy. v2: introduce ptr_to_u64 (Chris) Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006130844.49012-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/i915_utils.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a2fad82532e4..31381a327347 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -388,13 +388,13 @@ static void guc_stage_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc->db_trigger_cpu = (u64)__get_doorbell(client); + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; desc->wq_addr = gfx_addr + GUC_DB_SIZE; desc->wq_size = GUC_WQ_SIZE; - desc->desc_private = (u64)client; + desc->desc_private = ptr_to_u64(client); } static void guc_stage_desc_fini(struct intel_guc *guc, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 12fc250b47b9..c242327d5217 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -99,6 +99,11 @@ __T; \ }) +static inline u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t)ptr; +} + #define u64_to_ptr(T, x) ({ \ typecheck(u64, x); \ (T *)(uintptr_t)(x); \ From 7c26240e8a19240924b029215989f28995f16d8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Oct 2017 11:40:38 +0100 Subject: [PATCH 061/145] drm/i915: Try harder to finish the idle-worker If a worker requeues itself, it may switch to a different kworker pool, which flush_work() considers as complete. To be strict, we then need to keep flushing the work until it is no longer pending. References: https://bugs.freedesktop.org/show_bug.cgi?id=102456 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006104038.22337-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +-- drivers/gpu/drm/i915/i915_gem.c | 3 +-- drivers/gpu/drm/i915/i915_utils.h | 13 +++++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 44aae25d12c7..84ab77c02d3e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4259,8 +4259,7 @@ fault_irq_set(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); /* Flush idle worker to disarm irq */ - while (flush_delayed_work(&i915->gt.idle_work)) - ; + drain_delayed_work(&i915->gt.idle_work); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ab8c6946fea4..c48700f7bc6e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4547,8 +4547,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) /* As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - while (flush_delayed_work(&dev_priv->gt.idle_work)) - ; + drain_delayed_work(&dev_priv->gt.idle_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index c242327d5217..af3d7cc53fa1 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -124,4 +124,17 @@ static inline void __list_del_many(struct list_head *head, WRITE_ONCE(head->next, first); } +/* + * Wait until the work is finally complete, even if it tries to postpone + * by requeueing itself. Note, that if the worker never cancels itself, + * we will spin forever. + */ +static inline void drain_delayed_work(struct delayed_work *dw) +{ + do { + while (flush_delayed_work(dw)) + ; + } while (delayed_work_pending(dw)); +} + #endif /* !__I915_UTILS_H */ From 320671f94ada80ff036cc9d5dcd730ba4f3e0f1a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Oct 2017 11:04:16 +0100 Subject: [PATCH 062/145] drm/i915: Silence compiler warning for hsw_power_well_enable() Not all compilers are able to determine that pg is guarded by wait_fuses and so may think that pg is used uninitialized. Reported-by: Geert Uytterhoeven Fixes: b2891eb2531e ("drm/i915/hsw+: Add has_fuses power well attribute") Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Arkadiusz Hiler Link: https://patchwork.freedesktop.org/patch/msgid/20171002100416.25865-1-chris@chris-wilson.co.uk Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 3791c3f5f56d..c4e1aba83c3e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = power_well->id; bool wait_fuses = power_well->hsw.has_fuses; - enum skl_power_gate pg; + enum skl_power_gate uninitialized_var(pg); u32 val; if (wait_fuses) { From 8d550824c6f52506754f11cb6be51aa153cc580d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Oct 2017 12:56:17 +0100 Subject: [PATCH 063/145] drm/i915: Order two completing nop_submit_request If two nop's (requests in-flight following a wedged device) complete at the same time, the global_seqno value written to the HWSP is undefined as the two threads are not serialized. v2: Use irqsafe spinlock. We expect the callback may be called from inside another irq spinlock, so we can't unconditionally restore irqs. Fixes: ce1135c7de64 ("drm/i915: Complete requests in nop_submit_request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171006115617.18432-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c48700f7bc6e..50cc3c2cef06 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3014,10 +3014,15 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { + unsigned long flags; + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } static void engine_set_wedged(struct intel_engine_cs *engine) From 279f5a00c9a9b39f4f6e9813e6d4da8c181d34c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Oct 2017 20:10:05 +0100 Subject: [PATCH 064/145] drm/i915/execlists: Add a comment for the extra MI_ARB_ENABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michel Thierry noticed that we were applying WaDisableCtxRestoreArbitration even to gen9, which does not require the w/a. The rationale is that we need to enable MI arbitration for execlists to work, and to be safe we do that before every batch (in addition to every context switch into the batch). Since this is not clear from the single line comment suggesting the MI_ARB_ENABLE is solely for the w/a, add a little more detail. Signed-off-by: Chris Wilson Cc: Michel Thierry Cc: Joonas Lahtinen Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171005191005.13462-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry --- drivers/gpu/drm/i915/intel_lrc.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c5b76082d695..721432ddf403 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1618,7 +1618,23 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, if (IS_ERR(cs)) return PTR_ERR(cs); - /* WaDisableCtxRestoreArbitration:bdw,chv */ + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, there doesn't seem to be a downside to + * being paranoid and making sure it is set before each batch and + * every context-switch. + * + * Note that if we fail to enable arbitration before the request + * is complete, then we do not see the context-switch interrupt and + * the engine hangs (with RING_HEAD == RING_TAIL). + * + * That satisfies both the GPGPU w/a and our heavy-handed paranoia. + */ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; /* FIXME(BDW): Address space and security selectors. */ From 703321b60b605b1f381f5dcf0bca42703b912e3e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:13 +0100 Subject: [PATCH 065/145] mm/shmem: introduce shmem_file_setup_with_mnt We are planning to use our own tmpfs mnt in i915 in place of the shm_mnt, such that we can control the mount options, in particular huge=, which we require to support huge-gtt-pages. So rather than roll our own version of __shmem_file_setup, it would be preferred if we could just give shmem our mnt, and let it do the rest. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Hugh Dickins Cc: linux-mm@kvack.org Acked-by: Andrew Morton Acked-by: Kirill A. Shutemov Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-2-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-1-chris@chris-wilson.co.uk --- include/linux/shmem_fs.h | 2 ++ mm/shmem.c | 30 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index b6c3540e07bc..0937d9a7d8fb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -53,6 +53,8 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); +extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, + const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); diff --git a/mm/shmem.c b/mm/shmem.c index 07a1d22807be..3229d27503ec 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4183,7 +4183,7 @@ static const struct dentry_operations anon_ops = { .d_dname = simple_dname }; -static struct file *__shmem_file_setup(const char *name, loff_t size, +static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags, unsigned int i_flags) { struct file *res; @@ -4192,8 +4192,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, struct super_block *sb; struct qstr this; - if (IS_ERR(shm_mnt)) - return ERR_CAST(shm_mnt); + if (IS_ERR(mnt)) + return ERR_CAST(mnt); if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); @@ -4205,8 +4205,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, this.name = name; this.len = strlen(name); this.hash = 0; /* will go */ - sb = shm_mnt->mnt_sb; - path.mnt = mntget(shm_mnt); + sb = mnt->mnt_sb; + path.mnt = mntget(mnt); path.dentry = d_alloc_pseudo(sb, &this); if (!path.dentry) goto put_memory; @@ -4251,7 +4251,7 @@ put_path: */ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) { - return __shmem_file_setup(name, size, flags, S_PRIVATE); + return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); } /** @@ -4262,10 +4262,24 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon */ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) { - return __shmem_file_setup(name, size, flags, 0); + return __shmem_file_setup(shm_mnt, name, size, flags, 0); } EXPORT_SYMBOL_GPL(shmem_file_setup); +/** + * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs + * @mnt: the tmpfs mount where the file will be created + * @name: name for dentry (to be seen in /proc//maps + * @size: size to be set for the file + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size + */ +struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, + loff_t size, unsigned long flags) +{ + return __shmem_file_setup(mnt, name, size, flags, 0); +} +EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); + /** * shmem_zero_setup - setup a shared anonymous mapping * @vma: the vma to be mmapped is prepared by do_mmap_pgoff @@ -4281,7 +4295,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ - file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE); + file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); if (IS_ERR(file)) return PTR_ERR(file); From 465c403cb50868a86164bfe5b3bf6ddd70c395e1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:14 +0100 Subject: [PATCH 066/145] drm/i915: introduce simple gemfs Not a fully blown gemfs, just our very own tmpfs kernel mount. Doing so moves us away from the shmemfs shm_mnt, and gives us the much needed flexibility to do things like set our own mount options, namely huge= which should allow us to enable the use of transparent-huge-pages for our shmem backed objects. v2: various improvements suggested by Joonas v3: move gemfs instance to i915.mm and simplify now that we have file_setup_with_mnt v4: fallback to tmpfs shm_mnt upon failure to setup gemfs v5: make tmpfs fallback kinder v5: better gemfs failure message flags variable Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Hugh Dickins Cc: linux-mm@kvack.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-3-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++- drivers/gpu/drm/i915/i915_gemfs.c | 52 +++++++++++++++++++ drivers/gpu/drm/i915/i915_gemfs.h | 34 ++++++++++++ .../gpu/drm/i915/selftests/mock_gem_device.c | 4 ++ 6 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_gemfs.c create mode 100644 drivers/gpu/drm/i915/i915_gemfs.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 51d0d2929a4b..66d23b619db1 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -47,6 +47,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ + i915_gemfs.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fc7080bfa7b..ec6f320cc4f5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1511,6 +1511,11 @@ struct i915_gem_mm { /** Usable portion of the GTT for GEM */ dma_addr_t stolen_base; /* limited to low memory (32-bit) */ + /** + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; + /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 50cc3c2cef06..1da1f52d12cc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "i915_gemfs.h" #include #include #include @@ -4256,6 +4257,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .pwrite = i915_gem_object_pwrite_gtt, }; +static int i915_gem_object_create_shmem(struct drm_device *dev, + struct drm_gem_object *obj, + size_t size) +{ + struct drm_i915_private *i915 = to_i915(dev); + unsigned long flags = VM_NORESERVE; + struct file *filp; + + drm_gem_private_object_init(dev, obj, size); + + if (i915->mm.gemfs) + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, + flags); + else + filp = shmem_file_setup("i915", size, flags); + + if (IS_ERR(filp)) + return PTR_ERR(filp); + + obj->filp = filp; + + return 0; +} + struct drm_i915_gem_object * i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { @@ -4280,7 +4305,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4919,6 +4944,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + err = i915_gemfs_init(dev_priv); + if (err) + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); + return 0; err_priorities: @@ -4957,6 +4986,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ rcu_barrier(); + + i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c new file mode 100644 index 000000000000..168d0bd98f60 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -0,0 +1,52 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h new file mode 100644 index 000000000000..cca8bdc5b93e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 0c8225d677e9..e189d2b5f6c1 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -83,6 +83,8 @@ static void mock_device_release(struct drm_device *dev) kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); + i915_gemfs_fini(i915); + drm_dev_fini(&i915->drm); put_device(&i915->drm.pdev->dev); } @@ -243,6 +245,8 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->preempt_context) goto err_kernel_context; + WARN_ON(i915_gemfs_init(i915)); + return i915; err_kernel_context: From b901bb89324ae65573c5f5c0a126cb864096b77c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:15 +0100 Subject: [PATCH 067/145] drm/i915/gemfs: enable THP Enable transparent-huge-pages through gemfs by mounting with huge=within_size. v2: sprinkle within_size comment Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-4-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gemfs.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c index 168d0bd98f60..e2993857df37 100644 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -24,6 +24,7 @@ #include #include +#include #include "i915_drv.h" #include "i915_gemfs.h" @@ -41,6 +42,27 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (IS_ERR(gemfs)) return PTR_ERR(gemfs); + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + char options[] = "huge=within_size"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + i915->mm.gemfs = gemfs; return 0; From 2a9654b2cdd8f9ef51b91dfd4448973a47284825 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:16 +0100 Subject: [PATCH 068/145] drm/i915: introduce page_sizes field to dev_info In preparation for huge gtt pages expose page_sizes as part of the device info, to indicate the page sizes supported by the HW. Currently only 4K is supported. v2: s/page_size_mask/page_sizes/ v3: introduce I915_GTT_MAX_PAGE_SIZE Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-5-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.h | 8 +++++++- drivers/gpu/drm/i915/i915_pci.c | 18 ++++++++++++++++++ .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec6f320cc4f5..3d4dee817381 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -869,6 +869,8 @@ struct intel_device_info { u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; + unsigned int page_sizes; /* page sizes supported by the HW */ + #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f62fb903dc24..50218c141c21 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,7 +42,13 @@ #include "i915_gem_request.h" #include "i915_selftest.h" -#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_PAGE_SIZE_4K BIT(12) +#define I915_GTT_PAGE_SIZE_64K BIT(16) +#define I915_GTT_PAGE_SIZE_2M BIT(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE #define I915_FENCE_REG_NONE -1 diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 745b6a6e0188..7938006cf03a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -58,6 +58,10 @@ .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } /* Keep in gen based order, and chronological order within a gen */ + +#define GEN_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K + #define GEN2_FEATURES \ .gen = 2, .num_pipes = 1, \ .has_overlay = 1, .overlay_needs_physical = 1, \ @@ -67,6 +71,7 @@ .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i830_info __initconst = { @@ -100,6 +105,7 @@ static const struct intel_device_info intel_i865g_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i915g_info __initconst = { @@ -163,6 +169,7 @@ static const struct intel_device_info intel_pineview_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i965g_info __initconst = { @@ -205,6 +212,7 @@ static const struct intel_device_info intel_gm45_info __initconst = { .ring_mask = RENDER_RING | BSD_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_ironlake_d_info __initconst = { @@ -228,6 +236,7 @@ static const struct intel_device_info intel_ironlake_m_info __initconst = { .has_rc6p = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS #define SNB_D_PLATFORM \ @@ -271,6 +280,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info __initconst = .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ IVB_CURSOR_OFFSETS #define IVB_D_PLATFORM \ @@ -327,6 +337,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_snoop = true, .ring_mask = RENDER_RING | BSD_RING | BLT_RING, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS }; @@ -365,6 +376,7 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { #define GEN8_FEATURES \ G75_FEATURES, \ BDW_COLORS, \ + GEN_DEFAULT_PAGE_SIZES, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1, \ @@ -417,13 +429,18 @@ static const struct intel_device_info intel_cherryview_info __initconst = { .has_reset_engine = 1, .has_snoop = true, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_CHV_PIPEOFFSETS, CURSOR_OFFSETS, CHV_COLORS, }; +#define GEN9_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K + #define GEN9_FEATURES \ GEN8_FEATURES, \ + GEN9_DEFAULT_PAGE_SIZES, \ .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ @@ -486,6 +503,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_reset_engine = 1, \ .has_snoop = true, \ .has_ipc = 1, \ + GEN9_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS, \ BDW_COLORS diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e189d2b5f6c1..f46c3a35d61a 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -174,6 +174,9 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; + mkwrite_device_info(i915)->page_sizes = + I915_GTT_PAGE_SIZE_4K; + spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); From b91b09eea7a15ab417aa9ed6502b3be12f5283f8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:17 +0100 Subject: [PATCH 069/145] drm/i915: push set_pages down to the callers Each backend is now responsible for calling __i915_gem_object_set_pages upon successfully gathering its backing storage. This eliminates the inconsistency between the async and sync paths, which stands out even more when we start throwing around an sg_mask in a later patch. Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-6-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 45 ++++++++++--------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 15 +++++-- drivers/gpu/drm/i915/i915_gem_internal.c | 15 ++++--- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 16 ++++--- drivers/gpu/drm/i915/i915_gem_userptr.c | 12 ++--- .../gpu/drm/i915/selftests/huge_gem_object.c | 14 +++--- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 12 ++--- 8 files changed, 77 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1da1f52d12cc..42f2ca1e136b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -162,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static struct sg_table * -i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; drm_dma_handle_t *phys; @@ -171,9 +170,10 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) struct scatterlist *sg; char *vaddr; int i; + int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, @@ -183,7 +183,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) roundup_pow_of_two(obj->base.size), roundup_pow_of_two(obj->base.size)); if (!phys) - return ERR_PTR(-ENOMEM); + return -ENOMEM; vaddr = phys->vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -192,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { - st = ERR_CAST(page); + err = PTR_ERR(page); goto err_phys; } @@ -209,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } @@ -227,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; obj->phys_handle = phys; - return st; + + __i915_gem_object_set_pages(obj, st); + + return 0; err_phys: drm_pci_free(obj->base.dev, phys); - return st; + + return err; } static void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -2292,8 +2296,7 @@ static bool i915_sg_trim(struct sg_table *orig_st) return true; } -static struct sg_table * -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const unsigned long page_count = obj->base.size / PAGE_SIZE; @@ -2317,12 +2320,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } /* Get the list of pages out of our struct file. They'll be pinned @@ -2430,7 +2433,9 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - return st; + __i915_gem_object_set_pages(obj, st); + + return 0; err_sg: sg_mark_end(sg); @@ -2451,7 +2456,7 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ERR_PTR(ret); + return ret; } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, @@ -2474,19 +2479,17 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct sg_table *pages; + int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; } - pages = obj->ops->get_pages(obj); - if (unlikely(IS_ERR(pages))) - return PTR_ERR(pages); + err = obj->ops->get_pages(obj); + GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); - __i915_gem_object_set_pages(obj, pages); - return 0; + return err; } /* Ensure that the associated pages are gathered from the backing storage diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6176e589cf09..4c4dc85159fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -256,11 +256,18 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return drm_gem_dmabuf_export(dev, &exp_info); } -static struct sg_table * -i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - return dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); + struct sg_table *pages; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages); + + return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index c1f64ddaf8aa..f59764da4254 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -44,8 +44,7 @@ static void internal_free_pages(struct sg_table *st) kfree(st); } -static struct sg_table * -i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; @@ -78,12 +77,12 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) create_st: st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; npages = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = st->sgl; @@ -132,13 +131,17 @@ create_st: * object are only valid whilst active and pinned. */ obj->mm.madv = I915_MADV_DONTNEED; - return st; + + __i915_gem_object_set_pages(obj, st); + + return 0; err: sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; } static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..036e847b27f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -69,7 +69,7 @@ struct drm_i915_gem_object_ops { * being released or under memory pressure (where we attempt to * reap pages for the shrinker). */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); int (*pwrite)(struct drm_i915_gem_object *, diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 507c9f0d8df1..537ecb224db0 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -539,12 +539,18 @@ i915_pages_create_for_stolen(struct drm_device *dev, return st; } -static struct sg_table * -i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) { - return i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages); + + return 0; } static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2d4996de7331..70ad7489827d 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -434,6 +434,8 @@ alloc_table: return ERR_PTR(ret); } + __i915_gem_object_set_pages(obj, st); + return st; } @@ -521,7 +523,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pages = __i915_gem_userptr_alloc_pages(obj, pvec, npages); if (!IS_ERR(pages)) { - __i915_gem_object_set_pages(obj, pages); pinned = 0; pages = NULL; } @@ -582,8 +583,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } -static struct sg_table * -i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; @@ -612,9 +612,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (obj->userptr.work) { /* active flag should still be held for the pending work */ if (IS_ERR(obj->userptr.work)) - return ERR_CAST(obj->userptr.work); + return PTR_ERR(obj->userptr.work); else - return ERR_PTR(-EAGAIN); + return -EAGAIN; } pvec = NULL; @@ -650,7 +650,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) release_pages(pvec, pinned, 0); kvfree(pvec); - return pages; + return PTR_ERR_OR_ZERO(pages); } static void diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index c5c7e8efbdd3..41c15f3aa467 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -37,8 +37,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -huge_get_pages(struct drm_i915_gem_object *obj) +static int huge_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) const unsigned long nreal = obj->scratch / PAGE_SIZE; @@ -49,11 +48,11 @@ huge_get_pages(struct drm_i915_gem_object *obj) pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if (sg_alloc_table(pages, npages, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = pages->sgl; @@ -81,11 +80,14 @@ huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - return pages; + __i915_gem_object_set_pages(obj, pages); + + return 0; err: huge_free_pages(obj, pages); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6b132caffa18..aa1db375d59a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -39,8 +39,7 @@ static void fake_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -fake_get_pages(struct drm_i915_gem_object *obj) +static int fake_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) #define PFN_BIAS 0x1000 @@ -50,12 +49,12 @@ fake_get_pages(struct drm_i915_gem_object *obj) pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rem = round_up(obj->base.size, BIT(31)) >> 31; if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } rem = obj->base.size; @@ -72,7 +71,10 @@ fake_get_pages(struct drm_i915_gem_object *obj) GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; - return pages; + + __i915_gem_object_set_pages(obj, pages); + + return 0; #undef GFP } From a5c08166265adc172a4cbde8ed26a1a96ce77fb7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:18 +0100 Subject: [PATCH 070/145] drm/i915: introduce page_size members In preparation for supporting huge gtt pages for the ppgtt, we introduce page size members for gem objects. We fill in the page sizes by scanning the sg table. v2: pass the sg_mask to set_pages v3: calculate the sg_mask inline with populating the sg_table where possible, and pass to set_pages along with the pages. v4: bunch of improvements from Joonas v5: fix num_pages blunder introduce i915_sg_page_sizes helper v6: prefer GEM_BUG_ON(sizes == 0) Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Daniel Vetter Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-7-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 22 +++++++++- drivers/gpu/drm/i915/i915_gem.c | 42 ++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 5 ++- drivers/gpu/drm/i915/i915_gem_internal.c | 5 ++- drivers/gpu/drm/i915/i915_gem_object.h | 17 ++++++++ drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 5 ++- .../gpu/drm/i915/selftests/huge_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 5 ++- 9 files changed, 93 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d4dee817381..799a90abd81f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2872,6 +2872,21 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) +static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) +{ + unsigned int page_sizes; + + page_sizes = 0; + while (sg) { + GEM_BUG_ON(sg->offset); + GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); + page_sizes |= sg->length; + sg = __sg_next(sg); + } + + return page_sizes; +} + static inline unsigned int i915_sg_segment_size(void) { unsigned int size = swiotlb_max_segment(); @@ -3101,6 +3116,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) +#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ + GEM_BUG_ON((sizes) == 0); \ + ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \ +}) #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ @@ -3517,7 +3536,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages); + struct sg_table *pages, + unsigned int sg_mask); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 42f2ca1e136b..34398696824c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -228,7 +228,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) obj->phys_handle = phys; - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -2266,6 +2266,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2308,6 +2310,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_mask; gfp_t noreclaim; int ret; @@ -2339,6 +2342,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; + sg_mask = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2391,8 +2395,10 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_mask |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2403,8 +2409,10 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_mask |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2433,7 +2441,7 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg_mask); return 0; @@ -2460,8 +2468,13 @@ err_pages: } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_mask) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2475,6 +2488,25 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_mask); + obj->mm.page_sizes.phys = sg_mask; + + /* + * Calculate the supported page-sizes which fit into the given sg_mask. + * This will give us the page-sizes which we may be able to use + * opportunistically when later inserting into the GTT. For example if + * phys=2G, then in theory we should be able to use 1G, 2M, 64K or 4K + * pages, although in practice this will depend on a number of other + * factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 4c4dc85159fb..e542a9d80077 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -259,13 +259,16 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct sg_table *pages; + unsigned int sg_mask; pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) return PTR_ERR(pages); - __i915_gem_object_set_pages(obj, pages); + sg_mask = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_mask); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index f59764da4254..bdc23c4c8783 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -49,6 +49,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int sg_mask; unsigned int npages; int max_order; gfp_t gfp; @@ -87,6 +88,7 @@ create_st: sg = st->sgl; st->nents = 0; + sg_mask = 0; do { int order = min(fls(npages) - 1, max_order); @@ -104,6 +106,7 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_mask |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -132,7 +135,7 @@ create_st: */ obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg_mask); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 036e847b27f0..110672952a1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -169,6 +169,23 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + struct i915_page_sizes { + /** + * The sg mask of the pages sg_table. i.e the mask of + * of the lengths for each sg entry. + */ + unsigned int phys; + + /** + * The gtt page sizes we are allowed to use given the + * sg mask and the supported page sizes. This will + * express the smallest unit we can use for the whole + * object, as well as the larger sizes we may be able + * to use opportunistically. + */ + unsigned int sg; + } page_sizes; + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 537ecb224db0..54fd4cfa9d07 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -548,7 +548,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 70ad7489827d..41e16e19c3f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -405,6 +405,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; + unsigned int sg_mask; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -434,7 +435,9 @@ alloc_table: return ERR_PTR(ret); } - __i915_gem_object_set_pages(obj, st); + sg_mask = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_mask); return st; } diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index 41c15f3aa467..a2632df39173 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -80,7 +80,7 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index aa1db375d59a..883bc19e3aaf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -45,6 +45,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; + unsigned int sg_mask; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); @@ -57,6 +58,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) return -ENOMEM; } + sg_mask = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -65,6 +67,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; + sg_mask |= len; rem -= len; } @@ -72,7 +75,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, sg_mask); return 0; #undef GFP From fa3f46afd38cece52f6ff70603b15c1aeb6ec225 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:19 +0100 Subject: [PATCH 071/145] drm/i915: introduce vm set_pages/clear_pages Move the setting/clearing of the vma->pages to a vm operation. Doing so neatens things up a little, but more importantly gives us a sane place to also set/clear the vma->pages_sizes, which we introduce later in preparation for supporting huge-pages. v2: remove redundant vma->pages check v3: GEM_BUG_ON(vma->pages) following i915_vma_remove Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-8-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 70 ++++++++++++++--------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + drivers/gpu/drm/i915/i915_vma.c | 27 +++++---- drivers/gpu/drm/i915/selftests/mock_gtt.c | 11 ++-- 4 files changed, 66 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4c82ceb8d318..c534b74eee32 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -205,8 +205,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, return ret; } - vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -222,6 +220,26 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } +static int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + return 0; +} + +static void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; +} + static gen8_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level) { @@ -1452,6 +1470,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->debug_dump = gen8_dump_ppgtt; return 0; @@ -1894,6 +1914,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -2405,12 +2427,6 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (obj->gt_ro) @@ -2447,12 +2463,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - if (unlikely(!vma->pages)) { - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -2467,7 +2477,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->size); if (ret) - goto err_pages; + return ret; } appgtt->base.insert_entries(&appgtt->base, vma, cache_level, @@ -2481,17 +2491,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; - -err_pages: - if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { - if (vma->pages != vma->obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - } - return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) @@ -2529,6 +2528,19 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } +static int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + return 0; +} + static void i915_gtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -3151,6 +3163,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.insert_page = gen8_ggtt_insert_page; ggtt->base.clear_range = nop_clear_range; if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) @@ -3209,6 +3223,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3254,6 +3270,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = i915_gmch_remove; ggtt->invalidate = gmch_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 50218c141c21..f22491b4e6dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -335,6 +335,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 02d1a5eacb00..49bf49571e47 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -266,6 +266,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; + GEM_BUG_ON(!vma->pages); + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -471,25 +473,31 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) return ret; + GEM_BUG_ON(vma->pages); + + ret = vma->vm->set_pages(vma); + if (ret) + goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; - goto err_unpin; + goto err_clear; } ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, obj->cache_level, flags); if (ret) - goto err_unpin; + goto err_clear; } else { ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, start, end, flags); if (ret) - goto err_unpin; + goto err_clear; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -504,6 +512,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return 0; +err_clear: + vma->vm->clear_pages(vma); err_unpin: i915_gem_object_unpin_pages(obj); return ret; @@ -517,6 +527,8 @@ i915_vma_remove(struct i915_vma *vma) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + vma->vm->clear_pages(vma); + drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); @@ -569,8 +581,8 @@ int __i915_vma_do_pin(struct i915_vma *vma, err_remove: if ((bound & I915_VMA_BIND_MASK) == 0) { - GEM_BUG_ON(vma->pages); i915_vma_remove(vma); + GEM_BUG_ON(vma->pages); } err_unpin: __i915_vma_unpin(vma); @@ -695,13 +707,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - i915_vma_remove(vma); destroy: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index f2118cf535a0..336e1afb250f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,6 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->pages = vma->obj->mm.pages; vma->flags |= I915_VMA_LOCAL_BIND; return 0; } @@ -84,6 +83,8 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->base.insert_entries = mock_insert_entries; ppgtt->base.bind_vma = mock_bind_ppgtt; ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = mock_cleanup; return ppgtt; @@ -93,12 +94,6 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int err; - - err = i915_get_ggtt_vma_pages(vma); - if (err) - return err; - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -124,6 +119,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.insert_entries = mock_insert_entries; ggtt->base.bind_vma = mock_bind_ggtt; ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = mock_cleanup; i915_address_space_init(&ggtt->base, i915, "global"); From 7464284b357776b19e4cfb50d0d3724bdff035df Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:20 +0100 Subject: [PATCH 072/145] drm/i915: align the vma start to the largest gtt page size For the 48b PPGTT try to align the vma start address to the required page size boundary to guarantee we use said page size in the gtt. If we are dealing with multiple page sizes, we can't guarantee anything and just align to the largest. For soft pinning and objects which need to be tightly packed into the lower 32bits we don't force any alignment. v2: various improvements suggested by Chris v3: use set_pages and better placement of page_sizes v4: prefer upper_32_bits() v5: assign vma->page_sizes = vma->obj->page_sizes directly prefer sizeof(vma->page_sizes) v6: fixup checking of end to exclude GGTT (which are assumed to be limited to 4G). Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-9-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++++ drivers/gpu/drm/i915/i915_vma.c | 16 ++++++++++++++++ drivers/gpu/drm/i915/i915_vma.h | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c534b74eee32..fb7ac66814ab 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -226,6 +226,8 @@ static int ppgtt_set_pages(struct i915_vma *vma) vma->pages = vma->obj->mm.pages; + vma->page_sizes = vma->obj->mm.page_sizes; + return 0; } @@ -238,6 +240,8 @@ static void clear_pages(struct i915_vma *vma) kfree(vma->pages); } vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -2538,6 +2542,8 @@ static int ggtt_set_pages(struct i915_vma *vma) if (ret) return ret; + vma->page_sizes = vma->obj->mm.page_sizes; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 49bf49571e47..5d4164406b63 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -493,6 +493,22 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) goto err_clear; } else { + /* + * We only support huge gtt pages through the 48b PPGTT, + * however we also don't want to force any alignment for + * objects which need to be tightly packed into the low 32bits. + * + * Note that we assume that GGTT are limited to 4GiB for the + * forseeable future. See also i915_ggtt_offset(). + */ + if (upper_32_bits(end - 1) && + vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + u64 page_alignment = + rounddown_pow_of_two(vma->page_sizes.sg); + + alignment = max(alignment, page_alignment); + } + ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, start, end, flags); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e811067c7724..c59ba76613a3 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,7 @@ struct i915_vma { void __iomem *iomap; u64 size; u64 display_alignment; + struct i915_page_sizes page_sizes; u32 fence_size; u32 fence_alignment; From 855822be74a957073143268bf976739de016937f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:21 +0100 Subject: [PATCH 073/145] drm/i915: align 64K objects to 2M We can't mix 64K and 4K pte's in the same page-table, so for now we align 64K objects to 2M to avoid any potential mixing. This is potentially wasteful but in reality shouldn't be too bad since this only applies to the virtual address space of a 48b PPGTT. v2: don't separate logically connected ops Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-10-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5d4164406b63..72e86b32ab41 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -503,10 +503,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) */ if (upper_32_bits(end - 1) && vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + /* + * We can't mix 64K and 4K PTEs in the same page-table + * (2M block), and so to avoid the ugliness and + * complexity of coloring we opt for just aligning 64K + * objects to 2M. + */ u64 page_alignment = - rounddown_pow_of_two(vma->page_sizes.sg); + rounddown_pow_of_two(vma->page_sizes.sg | + I915_GTT_PAGE_SIZE_2M); alignment = max(alignment, page_alignment); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); } ret = i915_gem_gtt_insert(vma->vm, &vma->node, From 9a6330cff9b4b0691c7554fe873b09e7f6d377a9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:22 +0100 Subject: [PATCH 074/145] drm/i915: enable IPS bit for 64K pages Before we can enable 64K pages through the IPS bit, we must first enable it through MMIO, otherwise the page-walker will simply ignore it. v2: add comment mentioning that 64K is BDW+ v3: move to more suitable home Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-11-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fb7ac66814ab..74fc9ac11cd5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1987,6 +1987,23 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(dev_priv) <= 10) + I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, + I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | + GAMW_ECO_ENABLE_64K_IPS_FIELD); } int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e7dba5539b11..50e65c98ca6c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2371,6 +2371,9 @@ enum i915_power_well_id { #define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) #define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) +#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080) +#define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF + #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) From 8cb0983678e05939457d867e09bddb6883db5268 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:23 +0100 Subject: [PATCH 075/145] drm/i915: disable GTT cache for 2M pages When SW enables the use of 2M/1G pages, it must disable the GTT cache. v2: don't disable for Cherryview which doesn't even support 48b PPGTT! v3: explicitly check that the system does support 2M/1G pages v4: split WA and decision logic Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-12-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 171b21f6c4ad..9d0ca2656a23 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8453,6 +8453,9 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) { + /* The GTT cache must be disabled if the system is using 2M pages. */ + bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, + I915_GTT_PAGE_SIZE_2M); enum pipe pipe; ilk_init_lp_watermarks(dev_priv); @@ -8487,12 +8490,8 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); - /* - * WaGttCachingOffByDefault:bdw - * GTT cache may not work with big pages, so if those - * are ever enabled GTT cache may need to be disabled. - */ - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + /* WaGttCachingOffByDefault:bdw */ + I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); /* WaKVMNotificationOnConfigChange:bdw */ I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) From 0a03852e049af91da9ae70326c44bb5d9b0d377a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:24 +0100 Subject: [PATCH 076/145] drm/i915: support 2M pages for the 48b PPGTT Support inserting 2M gtt pages into the 48b PPGTT. v2: sanity check sg->length against page_size v3: don't recalculate rem on each loop whitespace breakup Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-13-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 76 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 74fc9ac11cd5..79ba485c5d42 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1013,6 +1013,69 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, cache_level); } +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, + struct i915_page_directory_pointer **pdps, + struct sgt_dma *iter, + enum i915_cache_level cache_level) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + do { + struct gen8_insert_pte idx = gen8_insert_pte(start); + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + unsigned int page_size; + gen8_pte_t encode = pte_encode; + gen8_pte_t *vaddr; + u16 index, max; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { + index = idx.pde; + max = I915_PDES; + page_size = I915_GTT_PAGE_SIZE_2M; + + encode |= GEN8_PDE_PS_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = pd->page_table[idx.pde]; + + index = idx.pte; + max = GEN8_PTES; + page_size = I915_GTT_PAGE_SIZE; + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + kunmap_atomic(vaddr); + } while (iter->sg); +} + static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, @@ -1025,11 +1088,16 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, .max = iter.dma + iter.sg->length, }; struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + } else { + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + &iter, &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + } } static void gen8_free_page_tables(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f22491b4e6dc..b9d7036c3665 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -154,6 +154,8 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) +#define GEN8_PDE_PS_2M BIT(7) + struct sg_table; struct intel_rotation_info { From aa095871e4a7ba9fcf87ffd762c044e1f9f634f1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:25 +0100 Subject: [PATCH 077/145] drm/i915: add support for 64K scratch page Before we can fully enable 64K pages, we need to first support a 64K scratch page if we intend to support the case where we have object sizes < 2M, since any scratch PTE must also point to a 64K region. Without this our 64K usage is limited to objects which completely fill the page-table, and therefore don't need any scratch. v2: add reminder about why 48b PPGTT Reported-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-14-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 64 ++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 79ba485c5d42..7eae6ab8c5fd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -519,22 +519,63 @@ static void fill_page_dma_32(struct i915_address_space *vm, static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - struct page *page; + struct page *page = NULL; dma_addr_t addr; + int order; - page = alloc_page(gfp | __GFP_ZERO); - if (unlikely(!page)) - return -ENOMEM; + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). + * + * TODO: we should really consider write-protecting the scratch-page and + * sharing between ppgtt + */ + if (i915_vm_is_48bit(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + order = get_order(I915_GTT_PAGE_SIZE_64K); + page = alloc_pages(gfp | __GFP_ZERO, order); + if (page) { + addr = dma_map_page(vm->dma, page, 0, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_pages(page, order); + page = NULL; + } - addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(vm->dma, addr))) { - __free_page(page); - return -ENOMEM; + if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { + dma_unmap_page(vm->dma, addr, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + __free_pages(page, order); + page = NULL; + } + } + } + + if (!page) { + order = 0; + page = alloc_page(gfp | __GFP_ZERO); + if (unlikely(!page)) + return -ENOMEM; + + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_page(page); + return -ENOMEM; + } } vm->scratch_page.page = page; vm->scratch_page.daddr = addr; + vm->scratch_page.order = order; + return 0; } @@ -542,8 +583,9 @@ static void cleanup_scratch_page(struct i915_address_space *vm) { struct i915_page_dma *p = &vm->scratch_page; - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); + dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, p->order); } static struct i915_page_table *alloc_pt(struct i915_address_space *vm) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b9d7036c3665..e9de3f05b0c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -215,6 +215,7 @@ struct i915_vma; struct i915_page_dma { struct page *page; + int order; union { dma_addr_t daddr; From 17a00cf73c31cca85531ec409508a2921c077851 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:26 +0100 Subject: [PATCH 078/145] drm/i915: support 64K pages for the 48b PPGTT Support inserting 64K pages into the 48b PPGTT. v2: check for 64K scratch v3: we should only have to re-adjust maybe_64K at every sg interval Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-15-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 31 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 7 +++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7eae6ab8c5fd..118aad90468f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1069,6 +1069,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; unsigned int page_size; + bool maybe_64K = false; gen8_pte_t encode = pte_encode; gen8_pte_t *vaddr; u16 index, max; @@ -1090,6 +1091,13 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, max = GEN8_PTES; page_size = I915_GTT_PAGE_SIZE; + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + vaddr = kmap_atomic_px(pt); } @@ -1109,12 +1117,35 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + rem; + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) break; } } while (rem >= page_size && index < max); kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + } } while (iter->sg); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e9de3f05b0c9..93211a96fdad 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -154,6 +154,7 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) +#define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) struct sg_table; @@ -352,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a From d9ec12f8e302731df6482d85a9c1f25b84a43de5 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:27 +0100 Subject: [PATCH 079/145] drm/i915: accurate page size tracking for the ppgtt Now that we support multiple page sizes for the ppgtt, it would be useful to track the real usage for debugging purposes. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-16-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem_object.h | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 118aad90468f..4c605785e2b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1053,6 +1053,8 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, cache_level); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, @@ -1145,7 +1147,10 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr = kmap_atomic_px(pd); vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; } + + vma->page_sizes.gtt |= page_size; } while (iter->sg); } @@ -1170,6 +1175,8 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, &idx, cache_level)) GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } } @@ -1891,6 +1898,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } } while (1); kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -2598,6 +2607,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); intel_runtime_pm_put(i915); + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + /* * Without aliasing PPGTT there's no difference between * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 110672952a1c..e4e6dd93889d 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -169,6 +169,7 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of @@ -184,6 +185,15 @@ struct drm_i915_gem_object { * to use opportunistically. */ unsigned int sg; + + /** + * The actual gtt page size usage. Since we can have + * multiple vma associated with this object we need to + * prevent any trampling of state, hence a copy of this + * struct also lives in each vma, therefore the gtt + * value here should only be read/write through the vma. + */ + unsigned int gtt; } page_sizes; struct i915_gem_object_page_iter { From 7393b7ee3a9c19776fd7b42369ea93994173abd9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:28 +0100 Subject: [PATCH 080/145] drm/i915/debugfs: include some gtt page size metrics Good to know, mostly for debugging purposes. v2: some improvements from Chris Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-17-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 61 +++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 84ab77c02d3e..f7817c667958 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -119,6 +119,36 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) return size; } +static const char * +stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len) +{ + size_t x = 0; + + switch (page_sizes) { + case 0: + return ""; + case I915_GTT_PAGE_SIZE_4K: + return "4K"; + case I915_GTT_PAGE_SIZE_64K: + return "64K"; + case I915_GTT_PAGE_SIZE_2M: + return "2M"; + default: + if (!buf) + return "M"; + + if (page_sizes & I915_GTT_PAGE_SIZE_2M) + x += snprintf(buf + x, len - x, "2M, "); + if (page_sizes & I915_GTT_PAGE_SIZE_64K) + x += snprintf(buf + x, len - x, "64K, "); + if (page_sizes & I915_GTT_PAGE_SIZE_4K) + x += snprintf(buf + x, len - x, "4K, "); + buf[x-2] = '\0'; + + return buf; + } +} + static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { @@ -156,9 +186,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; - seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s", i915_vma_is_ggtt(vma) ? "g" : "pp", - vma->node.start, vma->node.size); + vma->node.start, vma->node.size, + stringify_page_sizes(vma->page_sizes.gtt, NULL, 0)); if (i915_vma_is_ggtt(vma)) { switch (vma->ggtt_view.type) { case I915_GGTT_VIEW_NORMAL: @@ -403,10 +434,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mapped_count, purgeable_count, dpy_count; - u64 size, mapped_size, purgeable_size, dpy_size; + u32 count, mapped_count, purgeable_count, dpy_count, huge_count; + u64 size, mapped_size, purgeable_size, dpy_size, huge_size; struct drm_i915_gem_object *obj; + unsigned int page_sizes = 0; struct drm_file *file; + char buf[80]; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -420,6 +453,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; + huge_size = huge_count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { size += obj->base.size; ++count; @@ -433,6 +467,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); @@ -455,6 +495,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u bound objects, %llu bytes\n", count, size); @@ -462,11 +508,18 @@ static int i915_gem_object_info(struct seq_file *m, void *data) purgeable_count, purgeable_size); seq_printf(m, "%u mapped objects, %llu bytes\n", mapped_count, mapped_size); + seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n", + huge_count, + stringify_page_sizes(page_sizes, buf, sizeof(buf)), + huge_size); seq_printf(m, "%u display objects (pinned), %llu bytes\n", dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", ggtt->base.total, ggtt->mappable_end); + seq_printf(m, "Supported page sizes: %s\n", + stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, + buf, sizeof(buf))); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); From 4049866f0913110bf7de597c1177de7a1cc459cb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:29 +0100 Subject: [PATCH 081/145] drm/i915/selftests: huge page tests v2: mock test page support configurations and add MI_STORE_DWORD test v3: run all mockable huge page tests on all platforms via the mock_device v4: add pin_update regression test various improvements suggested by Chris v5: fix issues reported by kbuild test single sg spanning multiple page sizes don't explode when running the live-tests through the appgtt v6: lots of improvements from Chris v7: run on each engine for igt_write_huge add simple tmpfs fallback test v8: size_t is bad don't break the i386 build Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-18-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_object.h | 2 + drivers/gpu/drm/i915/selftests/huge_pages.c | 1715 +++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../drm/i915/selftests/i915_mock_selftests.h | 1 + 5 files changed, 1720 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/huge_pages.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 34398696824c..f8c3ac1c8c67 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5412,6 +5412,7 @@ err_unlock: #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index e4e6dd93889d..956c911c2cbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -196,6 +196,8 @@ struct drm_i915_gem_object { unsigned int gtt; } page_sizes; + I915_SELFTEST_DECLARE(unsigned int page_mask); + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c new file mode 100644 index 000000000000..b8495882e5b0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -0,0 +1,1715 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include + +#include "mock_drm.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_mask; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_mask = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_mask |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_mask != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_mask); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_mask; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_mask = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_mask |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_mask); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %lu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + const int gen = INTEL_GEN(vma->vm->i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct drm_i915_gem_request *rq; + struct i915_vma *batch; + int flags = 0; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_request; + } + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + +err_request: + __i915_add_request(rq, err == 0); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_obj_finish_shmem_access(obj); + + return err; +} + +static int igt_write_huge(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct intel_engine_cs *engine; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", + id); + continue; + } + + /* + * Try various offsets until we timeout -- we want to avoid + * issues hidden by effectively always using offset = 0. + */ + for_each_prime_number_from(num, 0, max) { + u64 offset = num * max_page_size; + u32 dword; + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, max_page_size, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) { + err = 0; + continue; + } + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + dword = offset_in_page(num) / 4; + + err = gpu_write(vma, ctx, engine, dword, num + 1); + if (err) { + pr_err("gpu-write failed at offset=%llx", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, num + 1); + if (err) { + pr_err("cpu-check failed at offset=%llx", offset); + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + + if (num > 0 && + igt_timeout(end_time, + "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n", + __func__, id, offset, max_page_size)) + break; + } + } + +out_vma_unpin: + if (i915_vma_is_pinned(vma)) + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = dev_priv->kernel_context->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!USES_FULL_48BIT_PPGTT(dev_priv)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, dev_priv->kernel_context, dev_priv->engine[RCS], + 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, i915->kernel_context, i915->engine[RCS], 0, + 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (!IS_ERR_OR_NULL(obj->mm.pages)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + int saved_ppgtt = i915_modparams.enable_ppgtt; + struct drm_i915_private *dev_priv; + struct pci_dev *pdev; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + i915_modparams.enable_ppgtt = 3; + + pdev = dev_priv->drm.pdev; + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_48bit(&ppgtt->base)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->base)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + i915_modparams.enable_ppgtt = saved_ppgtt; + + drm_dev_unref(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + int err; + + if (!USES_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + mutex_lock(&dev_priv->drm.struct_mutex); + err = i915_subtests(tests, dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 18b174d855ca..64acd7eccc5c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,5 +15,6 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index fc74687501ba..9961b44f76ed 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -21,3 +21,4 @@ selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) +selftest(hugepages, i915_gem_huge_page_mock_selftests) From 7924d9d4dc9ffff32bf099db8c638e11e5457cd7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:30 +0100 Subject: [PATCH 082/145] drm/i915/selftests: mix huge pages Try to mix sg page sizes for 4K, 64K and 2M pages. v2: s/BIT(x) >> 12/BIT(x) >> PAGE_SHIFT/ Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-19-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/scatterlist.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index 1cc5d2931753..cd6d2a16071f 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -189,6 +189,20 @@ static unsigned int random(unsigned long n, return 1 + (prandom_u32_state(rnd) % 1024); } +static unsigned int random_page_size_pages(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + /* 4K, 64K, 2M */ + static unsigned int page_count[] = { + BIT(12) >> PAGE_SHIFT, + BIT(16) >> PAGE_SHIFT, + BIT(21) >> PAGE_SHIFT, + }; + + return page_count[(prandom_u32_state(rnd) % 3)]; +} + static inline bool page_contiguous(struct page *first, struct page *last, unsigned long npages) @@ -252,6 +266,7 @@ static const npages_fn_t npages_funcs[] = { grow, shrink, random, + random_page_size_pages, NULL, }; From da9fe3f31a920658debd231ff10b0fbe0f2f34ed Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:31 +0100 Subject: [PATCH 083/145] drm/i915: disable platform support for vGPU huge gtt pages Currently gvt gtt handling doesn't support huge page entries, so disable for now. v2: remove useless 48b PPGTT check Suggested-by: Zhenyu Wang Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Zhenyu Wang Reviewed-by: Zhenyu Wang Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-20-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f8c3ac1c8c67..82a10036fb38 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4822,6 +4822,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + /* + * We need to fallback to 4K pages since gvt gtt handling doesn't + * support huge page entries - we will need to check either hypervisor + * mm can support huge guest page or just do emulation in gvt. + */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->page_sizes = + I915_GTT_PAGE_SIZE_4K; + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915_modparams.enable_execlists) { From f1f3f98272b9bb1ba87c2ccbc811b038574cec43 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:32 +0100 Subject: [PATCH 084/145] drm/i915: enable platform support for 64K pages For gen9+ enable platform level support for 64K pages. Also enable for mock testing. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-21-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 3 ++- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 7938006cf03a..8d349aec1902 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -436,7 +436,8 @@ static const struct intel_device_info intel_cherryview_info __initconst = { }; #define GEN9_DEFAULT_PAGE_SIZES \ - .page_sizes = I915_GTT_PAGE_SIZE_4K + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K #define GEN9_FEATURES \ GEN8_FEATURES, \ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f46c3a35d61a..7a9735dac912 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -175,7 +175,8 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; mkwrite_device_info(i915)->page_sizes = - I915_GTT_PAGE_SIZE_4K; + I915_GTT_PAGE_SIZE_4K | + I915_GTT_PAGE_SIZE_64K; spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); From a883241c3922000b21b58b5740c55badfe09940f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:33 +0100 Subject: [PATCH 085/145] drm/i915: enable platform support for 2M pages For gen8+ platforms which support the 48b PPGTT, enable platform level support for 2M pages. Also enable for mock testing. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-22-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 6 ++++-- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 8d349aec1902..bf467f30c99b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -376,7 +376,8 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { #define GEN8_FEATURES \ G75_FEATURES, \ BDW_COLORS, \ - GEN_DEFAULT_PAGE_SIZES, \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_2M, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1, \ @@ -437,7 +438,8 @@ static const struct intel_device_info intel_cherryview_info __initconst = { #define GEN9_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K | \ - I915_GTT_PAGE_SIZE_64K + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M #define GEN9_FEATURES \ GEN8_FEATURES, \ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 7a9735dac912..04eb9362f4f8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -176,7 +176,8 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->page_sizes = I915_GTT_PAGE_SIZE_4K | - I915_GTT_PAGE_SIZE_64K; + I915_GTT_PAGE_SIZE_64K | + I915_GTT_PAGE_SIZE_2M; spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); From e30a154b5262b967b133b06ac40777e651045898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 1 Apr 2016 18:37:25 +0300 Subject: [PATCH 086/145] drm/i915: Read timings from the correct transcoder in intel_crtc_mode_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_crtc->config->cpu_transcoder isn't yet filled out when intel_crtc_mode_get() gets called during output probing, so we should not use it there. Instead intel_crtc_mode_get() figures out the correct transcoder on its own, and that's what we should use. If the BIOS boots LVDS on pipe B, intel_crtc_mode_get() would actually end up reading the timings from pipe A instead (since PIPE_A==0), which clearly isn't what we want. It looks to me like this may have been broken by commit eccb140bca67 ("drm/i915: hw state readout&check support for cpu_transcoder") as that one removed the early initialization of cpu_transcoder from intel_crtc_init(). Cc: stable@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Rob Kramer Cc: Daniel Vetter Reported-by: Rob Kramer Fixes: eccb140bca67 ("drm/i915: hw state readout&check support for cpu_transcoder") References: https://lists.freedesktop.org/archives/dri-devel/2016-April/104142.html Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1459525046-19425-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9f2bf3b3f759..10501e855756 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10253,13 +10253,10 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder; struct drm_display_mode *mode; struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); + u32 htot, hsync, vtot, vsync; enum pipe pipe = intel_crtc->pipe; mode = kzalloc(sizeof(*mode), GFP_KERNEL); @@ -10287,6 +10284,13 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, i9xx_crtc_clock_get(intel_crtc, pipe_config); mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; + + cpu_transcoder = pipe_config->cpu_transcoder; + htot = I915_READ(HTOTAL(cpu_transcoder)); + hsync = I915_READ(HSYNC(cpu_transcoder)); + vtot = I915_READ(VTOTAL(cpu_transcoder)); + vsync = I915_READ(VSYNC(cpu_transcoder)); + mode->hdisplay = (htot & 0xffff) + 1; mode->htotal = ((htot & 0xffff0000) >> 16) + 1; mode->hsync_start = (hsync & 0xffff) + 1; From d0d37254680fb15fdefe03a0986cfc1e22bcbfc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 1 Apr 2016 21:48:50 +0300 Subject: [PATCH 087/145] drm/i915: Use intel_get_pipe_timings() and intel_mode_from_pipe_config() in intel_crtc_mode_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate the duplicate code for pipe timing readout in intel_crtc_mode_get() by using the functions we use for the normal state readout. v2: Store dotclock in adjusted_mode instead of the final mode Cc: dri-devel@lists.freedesktop.org Cc: Rob Kramer Cc: Daniel Vetter Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Tested-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1459536530-17754-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 10501e855756..15844bf92434 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10253,10 +10253,8 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder; struct drm_display_mode *mode; struct intel_crtc_state *pipe_config; - u32 htot, hsync, vtot, vsync; enum pipe pipe = intel_crtc->pipe; mode = kzalloc(sizeof(*mode), GFP_KERNEL); @@ -10283,24 +10281,12 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); i9xx_crtc_clock_get(intel_crtc, pipe_config); - mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; + pipe_config->base.adjusted_mode.crtc_clock = + pipe_config->port_clock / pipe_config->pixel_multiplier; - cpu_transcoder = pipe_config->cpu_transcoder; - htot = I915_READ(HTOTAL(cpu_transcoder)); - hsync = I915_READ(HSYNC(cpu_transcoder)); - vtot = I915_READ(VTOTAL(cpu_transcoder)); - vsync = I915_READ(VSYNC(cpu_transcoder)); + intel_get_pipe_timings(intel_crtc, pipe_config); - mode->hdisplay = (htot & 0xffff) + 1; - mode->htotal = ((htot & 0xffff0000) >> 16) + 1; - mode->hsync_start = (hsync & 0xffff) + 1; - mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; - mode->vdisplay = (vtot & 0xffff) + 1; - mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; - mode->vsync_start = (vsync & 0xffff) + 1; - mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; - - drm_mode_set_name(mode); + intel_mode_from_pipe_config(mode, pipe_config); kfree(pipe_config); From bef27bdb6cfbc8c74c776f9455d4d3a4a76a4872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 10:20:19 +0100 Subject: [PATCH 088/145] drm/i915: Assert we do not try to expand VMA for hugepage inside GGTT We only apply the hugepage PD redirection inside the ppGTT, so during i915_vma_insert() we want to exclude the GGTT from the additional alignment constraints (thereby avoiding the extra GTT pressure from fragmentation). Add an assert to document that intention alongside the comment. v2: After discussion with Matthew, make it a blanket GGTT ban (previously we allowed the expansion for appgtt, and so indirectly ggtt). There are issues we need to fix before allowing the current appgtt to be used with hugepages, and if we do, we probably want more care over when to expand/align, as the mappable aperture inside the ggtt is precious. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171009092019.20747-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 72e86b32ab41..2bad88cc927b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -513,6 +513,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) rounddown_pow_of_two(vma->page_sizes.sg | I915_GTT_PAGE_SIZE_2M); + /* + * Check we don't expand for the limited Global GTT + * (mappable aperture is even more precious!). This + * also checks that we exclude the aliasing-ppgtt. + */ + GEM_BUG_ON(i915_vma_is_ggtt(vma)); + alignment = max(alignment, page_alignment); if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) From f636edb214a5ffdc533d5c7198a66957322c1b40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 12:02:57 +0100 Subject: [PATCH 089/145] drm/i915: Make i915_engine_info pretty printer to standalone We can use drm_printer to hide the differences between printk and seq_printf, and so make the i915_engine_info pretty printer able to be called from different contexts and not just debugfs. For instance, I want to use the pretty printer to debug kselftests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171009110301.21705-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 148 +--------------------- drivers/gpu/drm/i915/intel_engine_cs.c | 160 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 4 + 3 files changed, 168 insertions(+), 144 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f7817c667958..9ec2bcd9a695 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3292,9 +3292,9 @@ static int i915_display_info(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; + struct drm_printer p; intel_runtime_pm_get(dev_priv); @@ -3303,149 +3303,9 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); - for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *rq; - struct rb_node *rb; - u64 addr; - - seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), - engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); - seq_printf(m, "\tReset count: %d\n", - i915_reset_engine_count(error, engine)); - - rcu_read_lock(); - - seq_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tlast "); - - rq = i915_gem_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - seq_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - } - - seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", - I915_READ(RING_START(engine->mmio_base)), - rq ? i915_ggtt_offset(rq->ring->vma) : 0); - seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, - rq ? rq->ring->head : 0); - seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, - rq ? rq->ring->tail : 0); - seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n", - I915_READ(RING_CTL(engine->mmio_base)), - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); - - rcu_read_unlock(); - - addr = intel_engine_get_active_head(engine); - seq_printf(m, "\tACTHD: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - addr = intel_engine_get_last_batch_head(engine); - seq_printf(m, "\tBBADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - - if (i915_modparams.enable_execlists) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - struct intel_engine_execlists * const execlists = &engine->execlists; - u32 ptr, read, write; - unsigned int idx; - - seq_printf(m, "\tExeclist status: 0x%08x %08x\n", - I915_READ(RING_EXECLIST_STATUS_LO(engine)), - I915_READ(RING_EXECLIST_STATUS_HI(engine))); - - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); - read = GEN8_CSB_READ_PTR(ptr); - write = GEN8_CSB_WRITE_PTR(ptr); - seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", - read, execlists->csb_head, - write, - intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); - if (read >= GEN8_CSB_ENTRIES) - read = 0; - if (write >= GEN8_CSB_ENTRIES) - write = 0; - if (read > write) - write += GEN8_CSB_ENTRIES; - while (read < write) { - idx = ++read % GEN8_CSB_ENTRIES; - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", - idx, - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), - hws[idx * 2], - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), - hws[idx * 2 + 1]); - } - - rcu_read_lock(); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - unsigned int count; - - rq = port_unpack(&execlists->port[idx], &count); - if (rq) { - seq_printf(m, "\t\tELSP[%d] count=%d, ", - idx, count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[%d] idle\n", - idx); - } - } - rcu_read_unlock(); - - spin_lock_irq(&engine->timeline->lock); - for (rb = execlists->first; rb; rb = rb_next(rb)) { - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); - - list_for_each_entry(rq, &p->requests, - priotree.link) - print_request(m, rq, "\t\tQ "); - } - spin_unlock_irq(&engine->timeline->lock); - } else if (INTEL_GEN(dev_priv) > 6) { - seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE(engine))); - seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE_READ(engine))); - seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", - I915_READ(RING_PP_DIR_DCLV(engine))); - } - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); - - seq_puts(m, "\n"); - } + p = drm_seq_file_printer(m); + for_each_engine(engine, dev_priv, id) + intel_engine_dump(engine, &p); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 807a7aafc089..a59b2a30ff5a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -1616,6 +1618,164 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +static void print_request(struct drm_printer *m, + struct drm_i915_gem_request *rq, + const char *prefix) +{ + drm_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, + rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, + rq->priotree.priority, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + rq->timeline->common->name); +} + +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct i915_gpu_error *error = &engine->i915->gpu_error; + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_gem_request *rq; + struct rb_node *rb; + u64 addr; + + drm_printf(m, "%s\n", engine->name); + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine), + engine->hangcheck.seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); + drm_printf(m, "\tReset count: %d\n", + i915_reset_engine_count(error, engine)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tlast "); + + rq = i915_gem_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + drm_printf(m, + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + } + + drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", + I915_READ(RING_START(engine->mmio_base)), + rq ? i915_ggtt_offset(rq->ring->vma) : 0); + drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, + rq ? rq->ring->head : 0); + drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, + rq ? rq->ring->tail : 0); + drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", + I915_READ(RING_CTL(engine->mmio_base)), + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); + + rcu_read_unlock(); + + addr = intel_engine_get_active_head(engine); + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + addr = intel_engine_get_last_batch_head(engine); + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + + if (i915_modparams.enable_execlists) { + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + struct intel_engine_execlists * const execlists = &engine->execlists; + u32 ptr, read, write; + unsigned int idx; + + drm_printf(m, "\tExeclist status: 0x%08x %08x\n", + I915_READ(RING_EXECLIST_STATUS_LO(engine)), + I915_READ(RING_EXECLIST_STATUS_HI(engine))); + + ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); + read = GEN8_CSB_READ_PTR(ptr); + write = GEN8_CSB_WRITE_PTR(ptr); + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + read, execlists->csb_head, + write, + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); + if (read >= GEN8_CSB_ENTRIES) + read = 0; + if (write >= GEN8_CSB_ENTRIES) + write = 0; + if (read > write) + write += GEN8_CSB_ENTRIES; + while (read < write) { + idx = ++read % GEN8_CSB_ENTRIES; + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", + idx, + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), + hws[idx * 2], + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), + hws[idx * 2 + 1]); + } + + rcu_read_lock(); + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + unsigned int count; + + rq = port_unpack(&execlists->port[idx], &count); + if (rq) { + drm_printf(m, "\t\tELSP[%d] count=%d, ", + idx, count); + print_request(m, rq, "rq: "); + } else { + drm_printf(m, "\t\tELSP[%d] idle\n", + idx); + } + } + rcu_read_unlock(); + + spin_lock_irq(&engine->timeline->lock); + for (rb = execlists->first; rb; rb = rb_next(rb)) { + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, + priotree.link) + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->timeline->lock); + } else if (INTEL_GEN(dev_priv) > 6) { + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE(engine))); + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE_READ(engine))); + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", + I915_READ(RING_PP_DIR_DCLV(engine))); + } + + spin_lock_irq(&b->rb_lock); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = rb_entry(rb, typeof(*w), node); + + drm_printf(m, "\t%s [%d] waiting for %x\n", + w->tsk->comm, w->tsk->pid, w->seqno); + } + spin_unlock_irq(&b->rb_lock); + + drm_printf(m, "\n"); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0fedda17488c..17186f067408 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -7,6 +7,8 @@ #include "i915_gem_timeline.h" #include "i915_selftest.h" +struct drm_printer; + #define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -839,4 +841,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); + #endif /* _INTEL_RINGBUFFER_H_ */ From 95a19ab4d7f8afbfca0564102d7d0bea2196c12c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 12:02:58 +0100 Subject: [PATCH 090/145] drm/i915/selftests: Pretty print engine state when requests fail to start During hangcheck testing, we try to execute requests following the GPU reset, and in particular want to try and debug when those fail. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171009110301.21705-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 08159b268893..7e1bdd88eda3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -624,8 +624,11 @@ static int igt_wait_reset(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -716,8 +719,12 @@ static int igt_reset_queue(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, prev)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(rq->engine, &p); + i915_gem_request_put(rq); i915_gem_request_put(prev); @@ -818,8 +825,11 @@ static int igt_handle_error(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); From 1749d90ff6e0cd0fc97aac5b08782ca1ac9665cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 12:02:59 +0100 Subject: [PATCH 091/145] drm/i915: Hold forcewake for the duration of reset+restart Resetting the engine requires us to hold the forcewake wakeref to prevent RC6 trying to happen in the middle of the reset sequence. The consequence of an unwanted RC6 event in the middle is that random state is then saved to the powercontext and restored later, which may overwrite the mmio state we need to preserve (e.g. PD_DIR_BASE in the legacy ringbuffer reset_ring_common()). This was noticed in the live_hangcheck selftests when Haswell would sporadically fail to restart during igt_reset_queue(). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171009110301.21705-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 82a10036fb38..eba23c239aae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2832,7 +2832,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request = NULL; - /* Prevent the signaler thread from updating the request + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + /* + * Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing * the reset. The write from the GPU of the seqno is * asynchronous and the signaler thread may see a different @@ -2843,7 +2853,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ kthread_park(engine->breadcrumbs.signaler); - /* Prevent request submission to the hardware until we have + /* + * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its engine->irq_tasklet *just* as we are @@ -3033,6 +3044,8 @@ void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { tasklet_enable(&engine->execlists.irq_tasklet); kthread_unpark(engine->breadcrumbs.signaler); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) From ff97d3ae6951101a85e5153cb128292d7119d15c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 12:03:00 +0100 Subject: [PATCH 092/145] drm/i915/selftests: Hold the rpm wakeref for the reset tests The lowlevel reset functions expect the caller to be holding the rpm wakeref for the device access across the reset. We were not explicitly doing this in the sefltest, so for simplicity acquire the wakeref for the duration of all subtests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171009110301.21705-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 7e1bdd88eda3..71ce06680d66 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -878,9 +878,16 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), }; + int err; if (!intel_has_gpu_reset(i915)) return 0; - return i915_subtests(tests, i915); + intel_runtime_pm_get(i915); + + err = i915_subtests(tests, i915); + + intel_runtime_pm_put(i915); + + return err; } From 67e6456485c7c04838a5dad720886726ae5590e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 12:03:01 +0100 Subject: [PATCH 093/145] drm/i915: Provide an assert for when we expect forcewake to be held Add assert_forcewakes_active() (the complementary function to assert_forcewakes_inactive) that documents the requirement of a function for its callers to be holding the forcewake ref (i.e. the function is part of a sequence over which RC6 must be prevented). One such example is during ringbuffer reset, where RC6 must be held across the whole reinitialisation sequence. v2: Include debug information in the WARN so we know which fw domain is missing. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20171009110301.21705-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++++++++- drivers/gpu/drm/i915/intel_uncore.c | 18 +++++++++++++++++- drivers/gpu/drm/i915/intel_uncore.h | 2 ++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05c08b0bc172..4285f09ff8b8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -579,7 +579,16 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - /* Try to restore the logical GPU state to match the continuation + /* + * RC6 must be prevented until the reset is complete and the engine + * reinitialised. If it occurs in the middle of this sequence, the + * state written to/loaded from the power context is ill-defined (e.g. + * the PP_BASE_DIR may be lost). + */ + assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); + + /* + * Try to restore the logical GPU state to match the continuation * of the request queue. If we skip the context/PD restore, then * the next request may try to execute assuming that its context * is valid and loaded on the GPU and so may try to access invalid diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b3c3f94fc7e4..983617b5b338 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -626,7 +626,23 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv) if (!dev_priv->uncore.funcs.force_wake_get) return; - WARN_ON(dev_priv->uncore.fw_domains_active); + WARN(dev_priv->uncore.fw_domains_active, + "Expected all fw_domains to be inactive, but %08x are still on\n", + dev_priv->uncore.fw_domains_active); +} + +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + if (!dev_priv->uncore.funcs.force_wake_get) + return; + + assert_rpm_wakelock_held(dev_priv); + + fw_domains &= dev_priv->uncore.fw_domains; + WARN(fw_domains & ~dev_priv->uncore.fw_domains_active, + "Expected %08x fw_domains to be active, but %08x are off\n", + fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active); } /* We give fast paths for the really cool registers */ diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 66eae2ce2f29..582771251b57 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -137,6 +137,8 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); enum forcewake_domains From b4563f595ed44514467cfb3566b8b6519f328354 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:43:55 +0100 Subject: [PATCH 094/145] drm/i915: Pin fence for iomap Acquire the fence register for the iomap in i915_vma_pin_iomap() on behalf of the caller. We probably want for the caller to specify whether the fence should be pinned for their usage, but at the moment all callers do want the associated fence, or none, so take it on their behalf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 35 ++++++++++++++++--- drivers/gpu/drm/i915/i915_vma.h | 7 +--- .../gpu/drm/i915/selftests/i915_gem_object.c | 8 ----- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 2bad88cc927b..595209a2f159 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -280,13 +280,16 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + int err; /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(vma->vm->i915); lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + err = -ENODEV; + goto err; + } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); @@ -296,14 +299,38 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, vma->node.start, vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); + if (ptr == NULL) { + err = -ENOMEM; + goto err; + } vma->iomap = ptr; } __i915_vma_pin(vma); + + err = i915_vma_get_fence(vma); + if (err) + goto err_unpin; + + i915_vma_pin_fence(vma); + return ptr; + +err_unpin: + __i915_vma_unpin(vma); +err: + return IO_ERR_PTR(err); +} + +void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->obj->base.dev->struct_mutex); + + GEM_BUG_ON(vma->iomap == NULL); + + i915_vma_unpin_fence(vma); + i915_vma_unpin(vma); } void i915_vma_unpin_and_release(struct i915_vma **p_vma) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index c59ba76613a3..864d84ab916d 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -322,12 +322,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * Callers must hold the struct_mutex. This function is only valid to be * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} +void i915_vma_unpin_iomap(struct i915_vma *vma); static inline struct page *i915_vma_first_page(struct i915_vma *vma) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 8f011c447e41..1b8774a42e48 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return PTR_ERR(io); } - err = i915_vma_get_fence(vma); - if (err) { - pr_err("Failed to get fence for partial view: offset=%lu\n", - page); - i915_vma_unpin_iomap(vma); - return err; - } - iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); i915_vma_unpin_iomap(vma); From 3bd4073524fa1586435725ad45ff971a6c2b2537 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:43:56 +0100 Subject: [PATCH 095/145] drm/i915: Consolidate get_fence with pin_fence Following the pattern now used for obj->mm.pages, use just pin_fence and unpin_fence to control access to the fence registers. I.e. instead of calling get_fence(); pin_fence(), we now just need to call pin_fence(). This will make it easier to reduce the locking requirements around fence registers. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++---- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 33 ++++++++++++++++++---- drivers/gpu/drm/i915/i915_vma.c | 4 +-- drivers/gpu/drm/i915/i915_vma.h | 20 ++++++------- drivers/gpu/drm/i915/intel_display.c | 3 +- 7 files changed, 45 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 799a90abd81f..5d322cf490c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3759,8 +3759,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) } /* i915_gem_fence_reg.c */ -int __must_check i915_vma_get_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); struct drm_i915_fence_reg * i915_reserve_fence(struct drm_i915_private *dev_priv); void i915_unreserve_fence(struct drm_i915_fence_reg *fence); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eba23c239aae..37eba9da3fca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1910,7 +1910,7 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - ret = i915_vma_get_fence(vma); + ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; @@ -1926,6 +1926,7 @@ int i915_gem_fault(struct vm_fault *vmf) min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); + i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d733c4d5a500..1df54e5fbb6e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -367,12 +367,12 @@ eb_pin_vma(struct i915_execbuffer *eb, return false; if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_get_fence(vma))) { + if (unlikely(i915_vma_pin_fence(vma))) { i915_vma_unpin(vma); return false; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -385,7 +385,7 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - i915_vma_unpin_fence(vma); + __i915_vma_unpin_fence(vma); __i915_vma_unpin(vma); } @@ -563,13 +563,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_get_fence(vma); + err = i915_vma_pin_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); return err; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 2783d63bd1ad..af824b8d73ea 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -280,8 +280,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int -i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; @@ -299,6 +298,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *fence; list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->pin_count) continue; @@ -313,7 +314,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) } /** - * i915_vma_get_fence - set up fencing for a vma + * i915_vma_pin_fence - set up fencing for a vma * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write @@ -331,10 +332,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) * 0 on success, negative error code on failure. */ int -i915_vma_get_fence(struct i915_vma *vma) +i915_vma_pin_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + int err; /* Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. @@ -344,6 +346,8 @@ i915_vma_get_fence(struct i915_vma *vma) /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; + GEM_BUG_ON(fence->vma != vma); + fence->pin_count++; if (!fence->dirty) { list_move_tail(&fence->link, &fence->i915->mm.fence_list); @@ -353,10 +357,25 @@ i915_vma_get_fence(struct i915_vma *vma) fence = fence_find(vma->vm->i915); if (IS_ERR(fence)) return PTR_ERR(fence); + + GEM_BUG_ON(fence->pin_count); + fence->pin_count++; } else return 0; - return fence_update(fence, set); + err = fence_update(fence, set); + if (err) + goto out_unpin; + + GEM_BUG_ON(fence->vma != set); + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); + + if (set) + return 0; + +out_unpin: + fence->pin_count--; + return err; } /** @@ -429,6 +448,8 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->vma) i915_gem_release_mmap(fence->vma->obj); } @@ -450,6 +471,8 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; + GEM_BUG_ON(vma && vma->fence != reg); + /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 595209a2f159..2b0083c34914 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -309,12 +309,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) __i915_vma_pin(vma); - err = i915_vma_get_fence(vma); + err = i915_vma_pin_fence(vma); if (err) goto err_unpin; - i915_vma_pin_fence(vma); - return ptr; err_unpin: diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 864d84ab916d..13d7ba7ee21e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -345,15 +345,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) +int i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); + +static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; } /** @@ -368,10 +366,8 @@ static inline void i915_vma_unpin_fence(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } + if (vma->fence) + __i915_vma_unpin_fence(vma); } #endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 15844bf92434..a1182eeee5af 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2219,8 +2219,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - if (i915_vma_get_fence(vma) == 0) - i915_vma_pin_fence(vma); + i915_vma_pin_fence(vma); } i915_vma_get(vma); From a65adaf8a834504a4acdc0deca7fa790771add8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:43:57 +0100 Subject: [PATCH 096/145] drm/i915: Track user GTT faulting per-vma We don't wish to refault the entire object (other vma) when unbinding one partial vma. To do this track which vma have been faulted into the user's address space. v2: Use a local vma_offset to tidy up a multiline unmap_mapping_range(). Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_object.h | 1 + drivers/gpu/drm/i915/i915_vma.c | 28 ++++++++++++- drivers/gpu/drm/i915/i915_vma.h | 21 +++++++++- 7 files changed, 88 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9ec2bcd9a695..5b58d2b897c7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -98,7 +98,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return !list_empty(&obj->userfault_link) ? 'g' : ' '; + return obj->userfault_count ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37eba9da3fca..9cb8f85cbaad 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1914,18 +1914,22 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (list_empty(&obj->userfault_link)) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); + if (ret) + goto err_fence; + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); + +err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); @@ -1978,6 +1982,25 @@ err: return ret; } +static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + i915_vma_unset_userfault(vma); + } +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -2008,12 +2031,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - if (list_empty(&obj->userfault_link)) + if (!obj->userfault_count) goto out; - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); + __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB @@ -2041,11 +2062,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.userfault_list, userfault_link) { - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - } + &dev_priv->mm.userfault_list, userfault_link) + __i915_gem_object_release_mmap(obj); /* The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering @@ -2068,7 +2086,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) if (!reg->vma) continue; - GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } } @@ -4276,7 +4294,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, mutex_init(&obj->mm.lock); INIT_LIST_HEAD(&obj->global_link); - INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4457,6 +4474,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4df039ef2ce3..933ee8ecfa54 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -82,7 +82,7 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) return false; list_add(&vma->evict_link, unwind); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index af824b8d73ea..012250f25255 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -240,7 +240,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, /* Ensure that all userspace CPU access is completed before * stealing the fence. */ - i915_gem_release_mmap(fence->vma->obj); + GEM_BUG_ON(fence->vma->fence != fence); + i915_vma_revoke_mmap(fence->vma); fence->vma->fence = NULL; fence->vma = NULL; @@ -451,7 +452,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) GEM_BUG_ON(fence->vma && fence->vma->fence != fence); if (fence->vma) - i915_gem_release_mmap(fence->vma->obj); + i915_vma_revoke_mmap(fence->vma); } } @@ -479,7 +480,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) */ if (vma && !i915_gem_object_is_tiled(vma->obj)) { GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(vma)); list_move(®->link, &dev_priv->mm.fence_list); vma->fence = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 956c911c2cbf..d67f1cbe842d 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -123,6 +123,7 @@ struct drm_i915_gem_object { /** * Whether the object is currently in the GGTT mmap. */ + unsigned int userfault_count; struct list_head userfault_link; struct list_head batch_pool_link; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 2b0083c34914..4dce2e0197d9 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -690,6 +690,30 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } +void i915_vma_revoke_mmap(struct i915_vma *vma) +{ + struct drm_vma_offset_node *node = &vma->obj->base.vma_node; + u64 vma_offset; + + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (!i915_vma_has_userfault(vma)) + return; + + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(!vma->obj->userfault_count); + + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, + drm_vma_node_offset_addr(node) + vma_offset, + vma->size, + 1); + + i915_vma_unset_userfault(vma); + if (!--vma->obj->userfault_count) + list_del(&vma->obj->userfault_link); +} + int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -753,11 +777,13 @@ int i915_vma_unbind(struct i915_vma *vma) return ret; /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); + i915_vma_revoke_mmap(vma); __i915_vma_iounmap(vma); vma->flags &= ~I915_VMA_CAN_FENCE; } + GEM_BUG_ON(vma->fence); + GEM_BUG_ON(i915_vma_has_userfault(vma)); if (likely(!vma->vm->closed)) { trace_i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 13d7ba7ee21e..1e2bc9b3c3ac 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -66,7 +66,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ unsigned int open_count; - unsigned int flags; + unsigned long flags; /** * How many users have pinned this object in GTT space. The following * users can each hold at most one reference: pwrite/pread, execbuffer @@ -88,6 +88,8 @@ struct i915_vma { #define I915_VMA_GGTT BIT(8) #define I915_VMA_CAN_FENCE BIT(9) #define I915_VMA_CLOSED BIT(10) +#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -146,6 +148,22 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) return vma->flags & I915_VMA_CLOSED; } +static inline bool i915_vma_set_userfault(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline void i915_vma_unset_userfault(struct i915_vma *vma) +{ + return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline bool i915_vma_has_userfault(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) { return vma->active; @@ -244,6 +262,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); From f34a93bbb33d988df36bb99f791f0bf8a7017041 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:43:58 +0100 Subject: [PATCH 097/145] drm/i915: Check PIN_NONFAULT overlaps in evict_for_node If the caller says that he doesn't want to evict any other faulting vma, honour that flag. The logic was used in evict_something, but not the more specific evict_for_node, now being used as a preliminary probe since commit 606fec956c0e ("drm/i915: Prefer random replacement before eviction search"). Fixes: 606fec956c0e ("drm/i915: Prefer random replacement before eviction search") Fixes: 821188778b9b ("drm/i915: Choose not to evict faultable objects from the GGTT") References: https://bugs.freedesktop.org/show_bug.cgi?id=102490 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_evict.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 933ee8ecfa54..a5a5b7e6daae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -315,6 +315,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { + ret = -ENOSPC; + break; + } + /* Overlap of objects in the same batch? */ if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; From 3c755c5b56a79cf0a2681abf705cebfb10737db2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:43:59 +0100 Subject: [PATCH 098/145] drm/i915: Try a minimal attempt to insert the whole object for relocations As we have a lightweight fallback to insert a single page into the aperture, try to avoid any heavier evictions when attempting to insert the entire object. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1df54e5fbb6e..3d7190764f10 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -974,7 +974,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range From a3259ca9f85bc59720abc1b6dbbec6ce563e0877 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:44:00 +0100 Subject: [PATCH 099/145] drm/i915: Avoid evicting user fault mappable vma for pread/pwrite Both pread/pwrite GTT paths provide a fast fallback in case we cannot map the whole object at a time. Currently, we use the fallback for very large objects and for active objects that would require remapping, but we can also add active fault mappable objects to the list that we want to avoid evicting. The rationale is that such fault mappable objects are in active use and to evict requires tearing down the CPU PTE and forcing a page fault on the next access; more costly, and intefers with other processes, than our per-page GTT fallback. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-6-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9cb8f85cbaad..6f5c0d6da06a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1055,7 +1055,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1239,7 +1241,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; From 43ae70d97c5195f48d903df31ecac4c5397b2f1e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Oct 2017 09:44:01 +0100 Subject: [PATCH 100/145] drm/i915: Early rejection of mappable GGTT pin attempts for large bo Currently, we reject attempting to pin a large bo into the mappable aperture, but only after trying to create the vma. Under debug kernels, repeatedly creating and freeing that vma for an oversized bo consumes one-third of the runtime for pwrite/pread tests as it is spent on kmalloc/kfree tracking. If we move the rejection to before creating that vma, we lose some accuracy of checking against the fence_size as opposed to object size, though the fence can never be smaller than the object. Note that the vma creation itself will reject an attempt to create a vma larger than the GTT so we can remove one redundant test. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009084401.29090-7-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 65 ++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6f5c0d6da06a..b43fae4b83e6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4036,42 +4036,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!view && flags & PIN_MAPPABLE) { + /* If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. + */ + if (obj->base.size > dev_priv->ggtt.mappable_end) + return ERR_PTR(-E2BIG); + + /* If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. + */ + if (flags & PIN_NONBLOCK && + obj->base.size > dev_priv->ggtt.mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + vma = i915_vma_instance(obj, vm, view); if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK && - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); - if (flags & PIN_MAPPABLE) { - /* If the required space is larger than the available - * aperture, we will not able to find a slot for the - * object and unbinding the object now will be in - * vain. Worse, doing so may cause us to ping-pong - * the object in and out of the Global GTT and - * waste a lot of cycles under the mutex. - */ - if (vma->fence_size > dev_priv->ggtt.mappable_end) - return ERR_PTR(-E2BIG); - - /* If NONBLOCK is set the caller is optimistically - * trying to cache the full object within the mappable - * aperture, and *must* have a fallback in place for - * situations where we cannot bind the object. We - * can be a little more lax here and use the fallback - * more often to avoid costly migrations of ourselves - * and other objects within the aperture. - * - * Half-the-aperture is used as a simple heuristic. - * More interesting would to do search for a free - * block prior to making the commitment to unbind. - * That caters for the self-harm case, and with a - * little more heuristics (e.g. NOFAULT, NOEVICT) - * we could try to minimise harm to others. - */ - if (flags & PIN_NONBLOCK && + if (flags & PIN_MAPPABLE && vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } From 84e8978e62fea661787a216e7fe9abac8f1e056e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Oct 2017 12:00:24 +0100 Subject: [PATCH 101/145] drm/i915: s/sg_mask/sg_page_sizes/ It's a little unclear what the sg_mask actually is, so prefer the more meaningful name of sg_page_sizes. Suggested-by: Joonas Lahtinen Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171009110024.29114-1-matthew.auld@intel.com Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++---------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_internal.c | 8 +++--- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 ++-- drivers/gpu/drm/i915/selftests/huge_pages.c | 18 ++++++------ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 8 +++--- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5d322cf490c4..770305bdeabb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3537,7 +3537,7 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, - unsigned int sg_mask); + unsigned int sg_page_sizes); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b43fae4b83e6..e829e8c900e8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2333,7 +2333,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); - unsigned int sg_mask; + unsigned int sg_page_sizes; gfp_t noreclaim; int ret; @@ -2365,7 +2365,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; - sg_mask = 0; + sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2419,7 +2419,7 @@ rebuild_st: sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { if (i) { - sg_mask |= sg->length; + sg_page_sizes |= sg->length; sg = sg_next(sg); } st->nents++; @@ -2433,7 +2433,7 @@ rebuild_st: WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } if (sg) { /* loop terminated early; short sg table */ - sg_mask |= sg->length; + sg_page_sizes |= sg->length; sg_mark_end(sg); } @@ -2464,7 +2464,7 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - __i915_gem_object_set_pages(obj, st, sg_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -2492,7 +2492,7 @@ err_pages: void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, - unsigned int sg_mask) + unsigned int sg_page_sizes) { struct drm_i915_private *i915 = to_i915(obj->base.dev); unsigned long supported = INTEL_INFO(i915)->page_sizes; @@ -2512,16 +2512,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.quirked = true; } - GEM_BUG_ON(!sg_mask); - obj->mm.page_sizes.phys = sg_mask; + GEM_BUG_ON(!sg_page_sizes); + obj->mm.page_sizes.phys = sg_page_sizes; /* - * Calculate the supported page-sizes which fit into the given sg_mask. - * This will give us the page-sizes which we may be able to use - * opportunistically when later inserting into the GTT. For example if - * phys=2G, then in theory we should be able to use 1G, 2M, 64K or 4K - * pages, although in practice this will depend on a number of other - * factors. + * Calculate the supported page-sizes which fit into the given + * sg_page_sizes. This will give us the page-sizes which we may be able + * to use opportunistically when later inserting into the GTT. For + * example if phys=2G, then in theory we should be able to use 1G, 2M, + * 64K or 4K pages, although in practice this will depend on a number of + * other factors. */ obj->mm.page_sizes.sg = 0; for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index e542a9d80077..864439a214c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -259,16 +259,16 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct sg_table *pages; - unsigned int sg_mask; + unsigned int sg_page_sizes; pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) return PTR_ERR(pages); - sg_mask = i915_sg_page_sizes(pages->sgl); + sg_page_sizes = i915_sg_page_sizes(pages->sgl); - __i915_gem_object_set_pages(obj, pages, sg_mask); + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index bdc23c4c8783..ee83ec838ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -49,7 +49,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; - unsigned int sg_mask; + unsigned int sg_page_sizes; unsigned int npages; int max_order; gfp_t gfp; @@ -88,7 +88,7 @@ create_st: sg = st->sgl; st->nents = 0; - sg_mask = 0; + sg_page_sizes = 0; do { int order = min(fls(npages) - 1, max_order); @@ -106,7 +106,7 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_mask |= PAGE_SIZE << order; + sg_page_sizes |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -135,7 +135,7 @@ create_st: */ obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 41e16e19c3f3..c36a84b070b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -405,7 +405,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; - unsigned int sg_mask; + unsigned int sg_page_sizes; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -435,9 +435,9 @@ alloc_table: return ERR_PTR(ret); } - sg_mask = i915_sg_page_sizes(st->sgl); + sg_page_sizes = i915_sg_page_sizes(st->sgl); - __i915_gem_object_set_pages(obj, st, sg_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return st; } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index b8495882e5b0..b8b9d0822199 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -68,7 +68,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) unsigned int page_mask = obj->mm.page_mask; struct sg_table *st; struct scatterlist *sg; - unsigned int sg_mask; + unsigned int sg_page_sizes; u64 rem; st = kmalloc(sizeof(*st), GFP); @@ -83,7 +83,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) rem = obj->base.size; sg = st->sgl; st->nents = 0; - sg_mask = 0; + sg_page_sizes = 0; /* * Our goal here is simple, we want to greedily fill the object from @@ -104,7 +104,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) goto err; sg_set_page(sg, page, page_size, 0); - sg_mask |= page_size; + sg_page_sizes |= page_size; st->nents++; rem -= page_size; @@ -124,8 +124,8 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_mask != obj->mm.page_mask); - __i915_gem_object_set_pages(obj, st, sg_mask); + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -192,7 +192,7 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) const u64 max_len = rounddown_pow_of_two(UINT_MAX); struct sg_table *st; struct scatterlist *sg; - unsigned int sg_mask; + unsigned int sg_page_sizes; u64 rem; st = kmalloc(sizeof(*st), GFP); @@ -208,7 +208,7 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) rem = obj->base.size; sg = st->sgl; st->nents = 0; - sg_mask = 0; + sg_page_sizes = 0; do { unsigned int page_size = get_largest_page_size(i915, rem); unsigned int len = min(page_size * div_u64(rem, page_size), @@ -221,7 +221,7 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) sg_dma_len(sg) = len; sg_dma_address(sg) = page_size; - sg_mask |= len; + sg_page_sizes |= len; st->nents++; @@ -236,7 +236,7 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 883bc19e3aaf..9da0c9f99916 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -45,7 +45,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; - unsigned int sg_mask; + unsigned int sg_page_sizes; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); @@ -58,7 +58,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) return -ENOMEM; } - sg_mask = 0; + sg_page_sizes = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -67,7 +67,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; - sg_mask |= len; + sg_page_sizes |= len; rem -= len; } @@ -75,7 +75,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages, sg_mask); + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; #undef GFP From 2de3813880bf16deff8223add4d7e1872c7a9e77 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 22 Sep 2017 17:53:42 -0300 Subject: [PATCH 102/145] drm/i915: add the BXT and CNL DPLL registers to pipe_config_compare Looks like we were missing them. Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20170922205343.16006-2-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a1182eeee5af..45d6e57fbe89 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11327,6 +11327,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); + PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb4); + PIPE_CONF_CHECK_X(dpll_hw_state.pll0); + PIPE_CONF_CHECK_X(dpll_hw_state.pll1); + PIPE_CONF_CHECK_X(dpll_hw_state.pll2); + PIPE_CONF_CHECK_X(dpll_hw_state.pll3); + PIPE_CONF_CHECK_X(dpll_hw_state.pll6); + PIPE_CONF_CHECK_X(dpll_hw_state.pll8); + PIPE_CONF_CHECK_X(dpll_hw_state.pll9); + PIPE_CONF_CHECK_X(dpll_hw_state.pll10); + PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); From 0e005888b833ba65dcf2c7aa12958dad6b2b8618 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 5 Oct 2017 18:38:42 -0300 Subject: [PATCH 103/145] drm/i915: avoid division by zero on cnl_calc_wrpll_link If for some unexpected reason the registers all read zero it's better to WARN and return instead of dividing by zero and completely freezing the machine. I don't expect this to happen in the wild with the current code, but I accidentally triggered the division by zero while doing some debugging in an unusual environment. Cc: Rodrigo Vivi Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171005213842.11423-2-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ce4b7335a78a..b307b6fe1ce3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1220,6 +1220,9 @@ static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; + if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) + return 0; + return dco_freq / (p0 * p1 * p2 * 5); } From 348e4058ebf53904e817eec7a1b25327143c2ed2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:21:57 +0300 Subject: [PATCH 104/145] drm/i915/bios: parse DDI ports also for CHV for HDMI DDC pin and DP AUX channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While technically CHV isn't DDI, we do look at the VBT based DDI port info for HDMI DDC pin and DP AUX channel. (We call these "alternate", but they're really just something that aren't platform defaults.) In commit e4ab73a13291 ("drm/i915: Respect alternate_ddc_pin for all DDI ports") Ville writes, "IIRC there may be CHV system that might actually need this." I'm not sure why there couldn't be even more platforms that need this, but start conservative, and parse the info for CHV in addition to DDI. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100553 Reported-by: Marek Wilczewski Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/d0815082cb98487618429b62414854137049b888.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b881ce8596ee..0a1159761ffd 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1240,7 +1240,7 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) From a87145ca56d14fc4b0bce68c18a5fecb01a09540 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:21:58 +0300 Subject: [PATCH 105/145] drm/i915/bios: refactor parse general definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Early return on failures. Rename the variable for later merging with parse_device_mappings(). Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/785abb904a572752fec68d90d34efeb67774dc1f.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 0a1159761ffd..22924612c680 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -435,21 +435,27 @@ static void parse_general_definitions(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { - const struct bdb_general_definitions *general; + const struct bdb_general_definitions *defs; + u16 block_size; + int bus_pin; - general = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (general) { - u16 block_size = get_blocksize(general); - if (block_size >= sizeof(*general)) { - int bus_pin = general->crt_ddc_gmbus_pin; - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) - dev_priv->vbt.crt_ddc_pin = bus_pin; - } else { - DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n", - block_size); - } + defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); + if (!defs) { + DRM_DEBUG_KMS("General definitions block not found\n"); + return; } + + block_size = get_blocksize(defs); + if (block_size < sizeof(*defs)) { + DRM_DEBUG_KMS("General definitions block too small (%u)\n", + block_size); + return; + } + + bus_pin = defs->crt_ddc_gmbus_pin; + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) + dev_priv->vbt.crt_ddc_pin = bus_pin; } static const struct child_device_config * From 2d936f1cf78fe84c05642545abe052488e7adad4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:21:59 +0300 Subject: [PATCH 106/145] drm/i915/bios: don't initialize fields based on vbt version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In theory, these might clobber information for older VBT versions. We might have to store the BDB version for later parsing, but currently all code accessing these fields will only use them on newer platforms with new enough BDB versions. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/0232d9cb258e8f83c4180cdb8aad1459a312ec2a.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 22924612c680..c91cdf131465 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1347,19 +1347,6 @@ parse_device_mapping(struct drm_i915_private *dev_priv, */ memcpy(child_dev_ptr, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - - /* - * copied full block, now init values when they are not - * available in current version - */ - if (bdb->version < 196) { - /* Set default values for bits added from v196 */ - child_dev_ptr->iboost = 0; - child_dev_ptr->hpd_invert = 0; - } - - if (bdb->version < 192) - child_dev_ptr->lspcon = 0; } return; } From 127704f5475a1913c284efa08376ec89ec513789 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:22:00 +0300 Subject: [PATCH 107/145] drm/i915/bios: remove an unnecessary temp variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for merging parse_device_mapping() into parse_general_definitions(). No functional changes. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/f1c3621e2622f4debdfb4a2f5c1959845754ac04.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c91cdf131465..2b72a7a520ce 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1265,7 +1265,6 @@ parse_device_mapping(struct drm_i915_private *dev_priv, { const struct bdb_general_definitions *defs; const struct child_device_config *child; - struct child_device_config *child_dev_ptr; int i, child_device_num, count; u8 expected_size; u16 block_size; @@ -1337,16 +1336,14 @@ parse_device_mapping(struct drm_i915_private *dev_priv, continue; } - child_dev_ptr = dev_priv->vbt.child_dev + count; - count++; - /* * Copy as much as we know (sizeof) and is available * (child_dev_size) of the child device. Accessing the data must * depend on VBT version. */ - memcpy(child_dev_ptr, child, + memcpy(dev_priv->vbt.child_dev + count, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); + count++; } return; } From 53f6b2436e7ba3d5f9b3b6b2d2b42089d64690e8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:22:01 +0300 Subject: [PATCH 108/145] drm/i915/bios: cleanup comments and useless return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/4a95fb9d23d980830e3158d3c57258e6539965ce.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 2b72a7a520ce..edc218e5eead 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1311,10 +1311,8 @@ parse_device_mapping(struct drm_i915_private *dev_priv, /* get the number of child device that is present */ for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } count++; } if (!count) { @@ -1331,10 +1329,8 @@ parse_device_mapping(struct drm_i915_private *dev_priv, count = 0; for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } /* * Copy as much as we know (sizeof) and is available @@ -1345,7 +1341,6 @@ parse_device_mapping(struct drm_i915_private *dev_priv, min_t(size_t, defs->child_dev_size, sizeof(*child))); count++; } - return; } /* Common defaults which may be overridden by VBT. */ From b3ca1f43b285dd74daa8ef87cce41bc50434a865 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:22:02 +0300 Subject: [PATCH 109/145] drm/i915/bios: merge parse_device_mapping() into parse_general_definitions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They're both parsing the same block, and there's no need for them to be split. The former also benefits from the range checks in the latter. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/64a292606ecbb0b8602e6c5523c5746573ec3944.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 48 +++++++++++-------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index edc218e5eead..aaad984e35e2 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -431,33 +431,6 @@ parse_general_features(struct drm_i915_private *dev_priv, dev_priv->vbt.fdi_rx_polarity_inverted); } -static void -parse_general_definitions(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) -{ - const struct bdb_general_definitions *defs; - u16 block_size; - int bus_pin; - - defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!defs) { - DRM_DEBUG_KMS("General definitions block not found\n"); - return; - } - - block_size = get_blocksize(defs); - if (block_size < sizeof(*defs)) { - DRM_DEBUG_KMS("General definitions block too small (%u)\n", - block_size); - return; - } - - bus_pin = defs->crt_ddc_gmbus_pin; - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) - dev_priv->vbt.crt_ddc_pin = bus_pin; -} - static const struct child_device_config * child_device_ptr(const struct bdb_general_definitions *defs, int i) { @@ -1260,20 +1233,34 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, } static void -parse_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_general_definitions(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { const struct bdb_general_definitions *defs; const struct child_device_config *child; int i, child_device_num, count; u8 expected_size; u16 block_size; + int bus_pin; defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); if (!defs) { DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n"); return; } + + block_size = get_blocksize(defs); + if (block_size < sizeof(*defs)) { + DRM_DEBUG_KMS("General definitions block too small (%u)\n", + block_size); + return; + } + + bus_pin = defs->crt_ddc_gmbus_pin; + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) + dev_priv->vbt.crt_ddc_pin = bus_pin; + if (bdb->version < 106) { expected_size = 22; } else if (bdb->version < 111) { @@ -1303,8 +1290,6 @@ parse_device_mapping(struct drm_i915_private *dev_priv, return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); /* get the number of child device */ child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; count = 0; @@ -1522,7 +1507,6 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); parse_sdvo_device_mapping(dev_priv, bdb); - parse_device_mapping(dev_priv, bdb); parse_driver_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); From 0ebdabe6126498f06cd1885dfbf658c46317041f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:22:03 +0300 Subject: [PATCH 110/145] drm/i915/bios: parse SDVO device mapping from pre-parsed child devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We parse and store the child devices in parse_general_definitions(). There is no need to parse the VBT block again for SDVO device mapping. Do the same as we do in parse_ddi_ports(). We no longer have access to child device size at this stage, but we also don't need to worry about reading past the child device anymore. Instead of a child device size check, do a mild optimization by limiting the parsing to gens 3 through 7. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/c918d4173dd38a165295f1270cb16c2c01bd8cd1.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 39 ++++++++++--------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index aaad984e35e2..4d26fcc5cabb 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -442,37 +442,21 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { struct sdvo_device_mapping *mapping; - const struct bdb_general_definitions *defs; const struct child_device_config *child; - int i, child_device_num, count; - u16 block_size; - - defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!defs) { - DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n"); - return; - } + int i, count = 0; /* - * Only parse SDVO mappings when the general definitions block child - * device size matches that of the *legacy* child device config - * struct. Thus, SDVO mapping will be skipped for newer VBT. + * Only parse SDVO mappings on gens that could have SDVO. This isn't + * accurate and doesn't have to be, as long as it's not too strict. */ - if (defs->child_dev_size != LEGACY_CHILD_DEVICE_CONFIG_SIZE) { - DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n"); + if (!IS_GEN(dev_priv, 3, 7)) { + DRM_DEBUG_KMS("Skipping SDVO device mapping\n"); return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); - /* get the number of child device */ - child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; - count = 0; - for (i = 0; i < child_device_num; i++) { - child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ - continue; - } + + for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { + child = dev_priv->vbt.child_dev + i; + if (child->slave_addr != SLAVE_ADDR1 && child->slave_addr != SLAVE_ADDR2) { /* @@ -523,7 +507,6 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, /* No SDVO device info is found */ DRM_DEBUG_KMS("No SDVO device info is found in VBT\n"); } - return; } static void @@ -1506,12 +1489,14 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_lfp_panel_data(dev_priv, bdb); parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); - parse_sdvo_device_mapping(dev_priv, bdb); parse_driver_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); parse_mipi_config(dev_priv, bdb); parse_mipi_sequence(dev_priv, bdb); + + /* Further processing on pre-parsed data */ + parse_sdvo_device_mapping(dev_priv, bdb); parse_ddi_ports(dev_priv, bdb); out: From 0ead5f81d4200b63cf92be867bfbd6708056e4a8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:22:04 +0300 Subject: [PATCH 111/145] drm/i915/bios: don't pass bdb to parsers that don't parse VBT directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hint that you're not supposed to look at VBT in these functions. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/b82c326be8c796a70bdc2bd1c479bbb6159f5cb0.1506586821.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_bios.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 4d26fcc5cabb..9d5b42953436 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -438,8 +438,7 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i) } static void -parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) { struct sdvo_device_mapping *mapping; const struct child_device_config *child; @@ -1073,7 +1072,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, } static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, - const struct bdb_header *bdb) + u8 bdb_version) { struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; @@ -1177,7 +1176,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, sanitize_aux_ch(dev_priv, port); } - if (bdb->version >= 158) { + if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", @@ -1187,7 +1186,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } /* Parse the I_boost config for SKL and above */ - if (bdb->version >= 196 && child->iboost) { + if (bdb_version >= 196 && child->iboost) { info->dp_boost_level = translate_iboost(child->dp_iboost_level); DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n", port_name(port), info->dp_boost_level); @@ -1197,8 +1196,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } } -static void parse_ddi_ports(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) { enum port port; @@ -1208,11 +1206,11 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, if (!dev_priv->vbt.child_dev_num) return; - if (bdb->version < 155) + if (bdb_version < 155) return; for (port = PORT_A; port < I915_MAX_PORTS; port++) - parse_ddi_port(dev_priv, port, bdb); + parse_ddi_port(dev_priv, port, bdb_version); } static void @@ -1496,8 +1494,8 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_mipi_sequence(dev_priv, bdb); /* Further processing on pre-parsed data */ - parse_sdvo_device_mapping(dev_priv, bdb); - parse_ddi_ports(dev_priv, bdb); + parse_sdvo_device_mapping(dev_priv, bdb->version); + parse_ddi_ports(dev_priv, bdb->version); out: if (!vbt) { From 7741b547b6e000b08e20667bb3bef22e1a362661 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 9 Oct 2017 18:44:00 +0200 Subject: [PATCH 112/145] drm/i915: Preallocate our mmu notifier workequeu to unbreak cpu hotplug deadlock 4.14-rc1 gained the fancy new cross-release support in lockdep, which seems to have uncovered a few more rules about what is allowed and isn't. This one here seems to indicate that allocating a work-queue while holding mmap_sem is a no-go, so let's try to preallocate it. Of course another way to break this chain would be somewhere in the cpu hotplug code, since this isn't the only trace we're finding now which goes through msr_create_device. Full lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.14.0-rc1-CI-CI_DRM_3118+ #1 Tainted: G U ------------------------------------------------------ prime_mmap/1551 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){++++}, at: [] apply_workqueue_attrs+0x17/0x50 but task is already holding lock: (&dev_priv->mm_lock){+.+.}, at: [] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&dev_priv->mm_lock){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __mutex_lock+0x86/0x9b0 mutex_lock_nested+0x1b/0x20 i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915] i915_gem_userptr_ioctl+0x222/0x2c0 [i915] drm_ioctl_kernel+0x69/0xb0 drm_ioctl+0x2f9/0x3d0 do_vfs_ioctl+0x94/0x670 SyS_ioctl+0x41/0x70 entry_SYSCALL_64_fastpath+0x1c/0xb1 -> #5 (&mm->mmap_sem){++++}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __might_fault+0x68/0x90 _copy_to_user+0x23/0x70 filldir+0xa5/0x120 dcache_readdir+0xf9/0x170 iterate_dir+0x69/0x1a0 SyS_getdents+0xa5/0x140 entry_SYSCALL_64_fastpath+0x1c/0xb1 -> #4 (&sb->s_type->i_mutex_key#5){++++}: down_write+0x3b/0x70 handle_create+0xcb/0x1e0 devtmpfsd+0x139/0x180 kthread+0x152/0x190 ret_from_fork+0x27/0x40 -> #3 ((complete)&req.done){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 wait_for_common+0x58/0x210 wait_for_completion+0x1d/0x20 devtmpfs_create_node+0x13d/0x160 device_add+0x5eb/0x620 device_create_groups_vargs+0xe0/0xf0 device_create+0x3a/0x40 msr_device_create+0x2b/0x40 cpuhp_invoke_callback+0xa3/0x840 cpuhp_thread_fun+0x7a/0x150 smpboot_thread_fn+0x18a/0x280 kthread+0x152/0x190 ret_from_fork+0x27/0x40 -> #2 (cpuhp_state){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 cpuhp_issue_call+0x10b/0x170 __cpuhp_setup_state_cpuslocked+0x134/0x2a0 __cpuhp_setup_state+0x46/0x60 page_writeback_init+0x43/0x67 pagecache_init+0x3d/0x42 start_kernel+0x3a8/0x3fc x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x6d/0x70 verify_cpu+0x0/0xfb -> #1 (cpuhp_state_mutex){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __mutex_lock+0x86/0x9b0 mutex_lock_nested+0x1b/0x20 __cpuhp_setup_state_cpuslocked+0x52/0x2a0 __cpuhp_setup_state+0x46/0x60 page_alloc_init+0x28/0x30 start_kernel+0x145/0x3fc x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x6d/0x70 verify_cpu+0x0/0xfb -> #0 (cpu_hotplug_lock.rw_sem){++++}: check_prev_add+0x430/0x840 __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 cpus_read_lock+0x3d/0xb0 apply_workqueue_attrs+0x17/0x50 __alloc_workqueue_key+0x1d8/0x4d9 i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915] i915_gem_userptr_ioctl+0x222/0x2c0 [i915] drm_ioctl_kernel+0x69/0xb0 drm_ioctl+0x2f9/0x3d0 do_vfs_ioctl+0x94/0x670 SyS_ioctl+0x41/0x70 entry_SYSCALL_64_fastpath+0x1c/0xb1 other info that might help us debug this: Chain exists of: cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev_priv->mm_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev_priv->mm_lock); lock(&mm->mmap_sem); lock(&dev_priv->mm_lock); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** 2 locks held by prime_mmap/1551: #0: (&mm->mmap_sem){++++}, at: [] i915_gem_userptr_init__mmu_notifier+0x138/0x270 [i915] #1: (&dev_priv->mm_lock){+.+.}, at: [] i915_gem_userptr_init__mmu_notifier+0x14a/0x270 [i915] stack backtrace: CPU: 4 PID: 1551 Comm: prime_mmap Tainted: G U 4.14.0-rc1-CI-CI_DRM_3118+ #1 Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 Call Trace: dump_stack+0x68/0x9f print_circular_bug+0x235/0x3c0 ? lockdep_init_map_crosslock+0x20/0x20 check_prev_add+0x430/0x840 __lock_acquire+0x1420/0x15e0 ? __lock_acquire+0x1420/0x15e0 ? lockdep_init_map_crosslock+0x20/0x20 lock_acquire+0xb0/0x200 ? apply_workqueue_attrs+0x17/0x50 cpus_read_lock+0x3d/0xb0 ? apply_workqueue_attrs+0x17/0x50 apply_workqueue_attrs+0x17/0x50 __alloc_workqueue_key+0x1d8/0x4d9 ? __lockdep_init_map+0x57/0x1c0 i915_gem_userptr_init__mmu_notifier+0x1fb/0x270 [i915] i915_gem_userptr_ioctl+0x222/0x2c0 [i915] ? i915_gem_userptr_release+0x140/0x140 [i915] drm_ioctl_kernel+0x69/0xb0 drm_ioctl+0x2f9/0x3d0 ? i915_gem_userptr_release+0x140/0x140 [i915] ? __do_page_fault+0x2a4/0x570 do_vfs_ioctl+0x94/0x670 ? entry_SYSCALL_64_fastpath+0x5/0xb1 ? __this_cpu_preempt_check+0x13/0x20 ? trace_hardirqs_on_caller+0xe3/0x1b0 SyS_ioctl+0x41/0x70 entry_SYSCALL_64_fastpath+0x1c/0xb1 RIP: 0033:0x7fbb83c39587 RSP: 002b:00007fff188dc228 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: ffffffff81492963 RCX: 00007fbb83c39587 RDX: 00007fff188dc260 RSI: 00000000c0186473 RDI: 0000000000000003 RBP: ffffc90001487f88 R08: 0000000000000000 R09: 00007fff188dc2ac R10: 00007fbb83efcb58 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000003 R14: 00000000c0186473 R15: 00007fff188dc2ac ? __this_cpu_preempt_check+0x13/0x20 Note that this also has the minor benefit of slightly reducing the critical section where we hold mmap_sem. v2: Set ret correctly when we raced with another thread. v3: Use Chris' diff. Attach the right lockdep splat. v4: Repaint in Tvrtko's colors (aka don't report ENOMEM if we race and some other thread managed to not also get an ENOMEM and successfully install the mmu notifier. Note that the kernel guarantees that small allocations succeed, so this never actually happens). Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Sasha Levin Cc: Marta Lofstedt Cc: Tejun Heo References: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_3180/shard-hsw3/igt@prime_mmap@test_userptr.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102939 Reviewed-by: Tvrtko Ursulin Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171009164401.16035-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_gem_userptr.c | 38 +++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index c36a84b070b6..4d712a4db63b 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -164,7 +164,6 @@ static struct i915_mmu_notifier * i915_mmu_notifier_create(struct mm_struct *mm) { struct i915_mmu_notifier *mn; - int ret; mn = kmalloc(sizeof(*mn), GFP_KERNEL); if (mn == NULL) @@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) return ERR_PTR(-ENOMEM); } - /* Protected by mmap_sem (write-lock) */ - ret = __mmu_notifier_register(&mn->mn, mm); - if (ret) { - destroy_workqueue(mn->wq); - kfree(mn); - return ERR_PTR(ret); - } - return mn; } @@ -210,23 +201,40 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn = mm->mn; + struct i915_mmu_notifier *mn; + int err = 0; mn = mm->mn; if (mn) return mn; + mn = i915_mmu_notifier_create(mm->mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + down_write(&mm->mm->mmap_sem); mutex_lock(&mm->i915->mm_lock); - if ((mn = mm->mn) == NULL) { - mn = i915_mmu_notifier_create(mm->mm); - if (!IS_ERR(mn)) - mm->mn = mn; + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else { + /* someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors */ + err = 0; } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - return mn; + if (mn) { + destroy_workqueue(mn->wq); + kfree(mn); + } + + return err ? ERR_PTR(err) : mm->mn; } static int From dc2279e169f0676b969014c02689c4bbb069ad01 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 10 Oct 2017 14:48:57 +0300 Subject: [PATCH 113/145] drm/i915: Use execlists_num_ports instead of size of array There is function to tell how many ports we have, so use it. We still have direct relationship with array size and port count, so no harm was done. Fixes: 76e70087d360 ("drm/i915: Make execlist port count variable") Cc: Mika Kuoppala Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171010114857.13108-1-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 31381a327347..f15de4dcefde 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -500,7 +500,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) const unsigned int engine_id = engine->id; unsigned int n; - for (n = 0; n < ARRAY_SIZE(execlists->port); n++) { + for (n = 0; n < execlists_num_ports(execlists); n++) { struct drm_i915_gem_request *rq; unsigned int count; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 721432ddf403..fbfcf88d7fe3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -691,7 +691,7 @@ static void execlist_cancel_port_requests(struct intel_engine_execlists *execlists) { struct execlist_port *port = execlists->port; - unsigned int num_ports = ARRAY_SIZE(execlists->port); + unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); From 69208c9e1e938cd115fdafdc502b5a7aa99d8d0b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Oct 2017 11:18:16 +0200 Subject: [PATCH 114/145] drm/i915: Increase atomic update vblank evasion time with lockdep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All our mmio writes take forever with lockdep due to the constant lock acquire&dropping we do. Ville has some patches to only acquire the mmio spinlocks once instead for every single mmio, but those aren't ready yet. As an interim solution just extend our budget slightly when lockdep is enabled, to avoid the rare and sporadic noise in CI. v2: I forgot to add the FIXME comment ... Cc: Ville Syrjala Acked-by: Ville Syrjälä References: https://bugs.freedesktop.org/show_bug.cgi?id=103169 References: https://bugs.freedesktop.org/show_bug.cgi?id=103124 References: https://bugs.freedesktop.org/show_bug.cgi?id=102403 References: https://bugs.freedesktop.org/show_bug.cgi?id=103020 References: https://bugs.freedesktop.org/show_bug.cgi?id=103019 References: https://bugs.freedesktop.org/show_bug.cgi?id=102723 References: https://bugs.freedesktop.org/show_bug.cgi?id=102544 References: https://bugs.freedesktop.org/show_bug.cgi?id=103180 Signed-off-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171010091816.26898-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/intel_sprite.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b0d6e3e28d07..f29369622d2c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -66,7 +66,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +/* FIXME: We should instead only take spinlocks once for the entire update + * instead of once per mmio. */ +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +#define VBLANK_EVASION_TIME_US 250 +#else #define VBLANK_EVASION_TIME_US 100 +#endif /** * intel_pipe_update_start() - start update of a set of display registers From ecf837d98e375d18f4fd8df6f744ca9ca9b5957d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 10 Oct 2017 15:55:56 +0300 Subject: [PATCH 115/145] drm/i915: Use enum pipe for PCH transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One more place where we've failed to switch to enum pipe when talking about PCH transcoders. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171010125556.25086-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45d6e57fbe89..57d886cece16 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14758,10 +14758,10 @@ static struct intel_connector *intel_encoder_find_connector(struct intel_encoder } static bool has_pch_trancoder(struct drm_i915_private *dev_priv, - enum transcoder pch_transcoder) + enum pipe pch_transcoder) { return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || - (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); } static void intel_sanitize_crtc(struct intel_crtc *crtc, @@ -14844,7 +14844,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, * PCH transcoders B and C would prevent enabling the south * error interrupt (see cpt_can_enable_serr_int()). */ - if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + if (has_pch_trancoder(dev_priv, crtc->pipe)) crtc->pch_fifo_underrun_disabled = true; } } From dfa311f0d8e2bbd4ba97344ac4a2eb6e810fc0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Sep 2017 17:08:54 +0300 Subject: [PATCH 116/145] drm/i915: Parametrize CBR_DPLLBMD_PIPE defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply a bit of polish by parametrizing the CBR_DPLLBMD_PIPE defines. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170913140900.6972-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 3 +-- drivers/gpu/drm/i915/intel_display.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 50e65c98ca6c..1ebbbc9ad9e7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5684,8 +5684,7 @@ enum { #define CBR_PWM_CLOCK_MUX_SELECT (1<<30) #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) -#define CBR_DPLLBMD_PIPE_C (1<<29) -#define CBR_DPLLBMD_PIPE_B (1<<18) +#define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */ /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 57d886cece16..c96d02ce1f2f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1539,7 +1539,7 @@ static void chv_enable_pll(struct intel_crtc *crtc, * DPLLCMD is AWOL. Use chicken bits to propagate * the value from DPLLBMD to either pipe B or C. */ - I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C); + I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe)); I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md); I915_WRITE(CBR4_VLV, 0); dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md; From 939994da2f63cb130ddeb131736254ad203662cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Sep 2017 17:08:56 +0300 Subject: [PATCH 117/145] drm/i915: Pass crtc state to i9xx_enable_pll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the crtc state to i9xx_enable_pll() and use it rather than crtc->config. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170913140900.6972-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c96d02ce1f2f..99a2052864a1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1568,11 +1568,12 @@ static int intel_num_dvo_pipes(struct drm_i915_private *dev_priv) return count; } -static void i9xx_enable_pll(struct intel_crtc *crtc) +static void i9xx_enable_pll(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = DPLL(crtc->pipe); - u32 dpll = crtc->config->dpll_hw_state.dpll; + u32 dpll = crtc_state->dpll_hw_state.dpll; int i; assert_pipe_disabled(dev_priv, crtc->pipe); @@ -1609,7 +1610,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DPLL_MD(crtc->pipe), - crtc->config->dpll_hw_state.dpll_md); + crtc_state->dpll_hw_state.dpll_md); } else { /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. @@ -5895,7 +5896,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); - i9xx_enable_pll(intel_crtc); + i9xx_enable_pll(intel_crtc, pipe_config); i9xx_pfit_enable(intel_crtc); From db37d8f39c1ea5a7efb410fda94e3bb9d5844b1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Sep 2017 17:08:57 +0300 Subject: [PATCH 118/145] drm/i915: Nuke the bogus kernel doc for i9xx_disable_pll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170913140900.6972-6-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 99a2052864a1..cf13fba80d53 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1628,15 +1628,6 @@ static void i9xx_enable_pll(struct intel_crtc *crtc, } } -/** - * i9xx_disable_pll - disable a PLL - * @dev_priv: i915 private structure - * @pipe: pipe PLL to disable - * - * Disable the PLL for @pipe, making sure the pipe is off first. - * - * Note! This is for pre-ILK only. - */ static void i9xx_disable_pll(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); From 0d5f662575e542ed22b5054c2b0887f42271be5e Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 10 Oct 2017 13:17:02 +0300 Subject: [PATCH 119/145] drm/i915: Don't relay on I915_MAX_PIPES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's remove the dependency on I915_MAX_PIPES. Instead, get the number of pipes from platform information. Reviewed-by: Ville Syrjälä Signed-off-by: Ramalingam C Signed-off-by: Mika Kahola Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1507630626-23806-2-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/intel_audio.c | 2 +- drivers/gpu/drm/i915/intel_pipe_crc.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 27743be5b768..0ddba16fde1b 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -754,7 +754,7 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv, { struct intel_encoder *encoder; - if (WARN_ON(pipe >= I915_MAX_PIPES)) + if (WARN_ON(pipe >= INTEL_INFO(dev_priv)->num_pipes)) return NULL; /* MST */ diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 96043a51c1bf..24d781f97761 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -775,11 +775,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) return -EINVAL; } -static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe) +static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, + const char *buf, enum pipe *pipe) { const char name = buf[0]; - if (name < 'A' || name >= pipe_name(I915_MAX_PIPES)) + if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) return -EINVAL; *pipe = name - 'A'; @@ -828,7 +829,7 @@ static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, return -EINVAL; } - if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) { + if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); return -EINVAL; } From 2b68504be6b47fabd4d14c89bbea37a6fd0fd28c Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 10 Oct 2017 13:17:03 +0300 Subject: [PATCH 120/145] drm/i915: Remove I915_MAX_PIPES dependency for DDB allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove dependency for I915_MAX_PIPES by replacing it with for_each_pipe() macro. v2: use 'enum pipe pipe' instead of 'i' Reviewed-by: Ville Syrjälä Signed-off-by: Ramalingam C Signed-off-by: Mika Kahola Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1507630626-23806-3-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/intel_display.c | 5 ++++- drivers/gpu/drm/i915/intel_drv.h | 3 ++- drivers/gpu/drm/i915/intel_pm.c | 12 +++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cf13fba80d53..e03b0c3d6f9f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12215,7 +12215,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (updated & cmask || !cstate->base.active) continue; - if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) + if (skl_ddb_allocation_overlaps(dev_priv, + entries, + &cstate->wm.skl.ddb, + i)) continue; updated |= cmask; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0cab667fff57..ceb3b8284c86 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1893,7 +1893,8 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9d0ca2656a23..39acfadb5a21 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4820,16 +4820,18 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, return a->start < b->end && b->start < a->end; } -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore) { - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - if (i != ignore && entries[i] && - skl_ddb_entries_overlap(ddb, entries[i])) + for_each_pipe(dev_priv, pipe) { + if (pipe != ignore && entries[pipe] && + skl_ddb_entries_overlap(ddb, entries[pipe])) return true; + } return false; } From 0a195c02942236c977703023c024b75f5bb6b254 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 10 Oct 2017 13:17:04 +0300 Subject: [PATCH 121/145] drm/i915: Fold IRQ pipe masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fold IRQ pipe masks into one loop instead of hardcoding per pipe. Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1507630626-23806-4-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index de777139f6a1..a38ad6043ae5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3610,16 +3610,15 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; + for_each_pipe(dev_priv, pipe) { + dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; - for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], de_pipe_enables); + } GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); From 78619e32e57f9f9acd8653b569e48f2d52fa3304 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 10 Oct 2017 13:17:05 +0300 Subject: [PATCH 122/145] drm/i915: Favor for_each_pipe() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Favor for_each_pipe() macro when looping through pipes. v2: use 'enum pipe pipe' instead of 'i' Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1507630626-23806-5-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/intel_pipe_crc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 24d781f97761..899839f2f7c6 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -206,11 +206,11 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) static int display_crc_ctl_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - seq_printf(m, "%c %s\n", pipe_name(i), - pipe_crc_source_name(dev_priv->pipe_crc[i].source)); + for_each_pipe(dev_priv, pipe) + seq_printf(m, "%c %s\n", pipe_name(pipe), + pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); return 0; } From 45c1cd8754064076f6090128958cbde9db45d41b Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 10 Oct 2017 13:17:06 +0300 Subject: [PATCH 123/145] drm/i915: Cleanup South Error Interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup and parametrize the handling of South Error Interrupts (SERR_INT). Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1507630626-23806-6-git-send-email-mika.kahola@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 12 ++++-------- drivers/gpu/drm/i915/i915_reg.h | 3 --- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a38ad6043ae5..3736290f2d0c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2252,18 +2252,14 @@ static void ivb_err_int_handler(struct drm_i915_private *dev_priv) static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) { u32 serr_int = I915_READ(SERR_INT); + enum pipe pipe; if (serr_int & SERR_INT_POISON) DRM_ERROR("PCH poison interrupt\n"); - if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A); - - if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B); - - if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C); + for_each_pipe(dev_priv, pipe) + if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe)) + intel_pch_fifo_underrun_irq_handler(dev_priv, pipe); I915_WRITE(SERR_INT, serr_int); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1ebbbc9ad9e7..d2d0a83c09b6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7182,9 +7182,6 @@ enum { #define SERR_INT _MMIO(0xc4040) #define SERR_INT_POISON (1<<31) -#define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6) -#define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3) -#define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0) #define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3)) /* digital port hotplug */ From 06ea8c537ab8d14454d41599a64d46364fe1a75a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Oct 2017 12:10:05 +0100 Subject: [PATCH 124/145] drm/i915: Silently fallback to 4k scratch If we fail to allocate a 64k hugepage for scratch, we try again with a normal 4k page (with some loss of efficiency at runtime). As we handle this gracefully, we do not need a noisy allocation failure warning. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171010111005.13625-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4c605785e2b3..ca7fd34fbe8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -538,7 +538,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) if (i915_vm_is_48bit(vm) && HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { order = get_order(I915_GTT_PAGE_SIZE_64K); - page = alloc_pages(gfp | __GFP_ZERO, order); + page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); if (page) { addr = dma_map_page(vm->dma, page, 0, I915_GTT_PAGE_SIZE_64K, From 617dc7610d9f5f64ec79cfdbdd4fb42cfcd93ee9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 10 Oct 2017 14:30:30 +0100 Subject: [PATCH 125/145] drm/i915/selftests: ditch the kernel context There's really no good reason to be using the kernel context for the huge-page livetests. Also with the introduction of commit bef27bdb6cfb ("drm/i915: Assert we do not try to expand VMA for hugepage inside GGTT") we start hitting the bug on in the selftests, since the kernel context will always return true for i915_vma_is_ggtt(), so now seems like the opportune time to instead create our own context. Fixes: 4049866f0913 ("drm/i915/selftests: huge page tests") Fixes: bef27bdb6cfb ("drm/i915: Assert we do not try to expand VMA for hugepage inside GGTT") Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010133030.12112-1-matthew.auld@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/huge_pages.c | 57 ++++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index b8b9d0822199..c53f8474113a 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1047,10 +1047,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int igt_write_huge(struct drm_i915_gem_object *obj) +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = i915->kernel_context; struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; struct intel_engine_cs *engine; struct i915_vma *vma; @@ -1149,7 +1149,8 @@ out_vma_close: static int igt_ppgtt_exhaust_huge(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; unsigned long supported = INTEL_INFO(i915)->page_sizes; static unsigned int pages[ARRAY_SIZE(page_sizes)]; struct drm_i915_gem_object *obj; @@ -1220,7 +1221,7 @@ static int igt_ppgtt_exhaust_huge(void *arg) /* Force the page-size for the gtt insertion */ obj->mm.page_sizes.sg = page_sizes; - err = igt_write_huge(obj); + err = igt_write_huge(ctx, obj); if (err) { pr_err("exhaust write-huge failed with size=%u\n", size); @@ -1245,7 +1246,8 @@ out_device: static int igt_ppgtt_internal_huge(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; static const unsigned int sizes[] = { SZ_64K, @@ -1280,7 +1282,7 @@ static int igt_ppgtt_internal_huge(void *arg) goto out_unpin; } - err = igt_write_huge(obj); + err = igt_write_huge(ctx, obj); if (err) { pr_err("internal write-huge failed with size=%u\n", size); @@ -1308,7 +1310,8 @@ static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) static int igt_ppgtt_gemfs_huge(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; static const unsigned int sizes[] = { SZ_2M, @@ -1347,7 +1350,7 @@ static int igt_ppgtt_gemfs_huge(void *arg) goto out_unpin; } - err = igt_write_huge(obj); + err = igt_write_huge(ctx, obj); if (err) { pr_err("gemfs write-huge failed with size=%u\n", size); @@ -1370,9 +1373,10 @@ out_put: static int igt_ppgtt_pin_update(void *arg) { - struct drm_i915_private *dev_priv = arg; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_hw_ppgtt *ppgtt = dev_priv->kernel_context->ppgtt; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; struct drm_i915_gem_object *obj; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; @@ -1473,8 +1477,7 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, dev_priv->kernel_context, dev_priv->engine[RCS], - 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); if (err) goto out_unpin; @@ -1492,9 +1495,9 @@ out_put: static int igt_tmpfs_fallback(void *arg) { - struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_gem_context *ctx = i915->kernel_context; struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1549,8 +1552,8 @@ out_restore: static int igt_shrink_thp(void *arg) { - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx = i915->kernel_context; + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1590,8 +1593,7 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, i915->kernel_context, i915->engine[RCS], 0, - 0xdeadbeaf); + err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); if (err) goto out_unpin; @@ -1700,6 +1702,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_ppgtt_gemfs_huge), SUBTEST(igt_ppgtt_internal_huge), }; + struct drm_file *file; + struct i915_gem_context *ctx; int err; if (!USES_PPGTT(dev_priv)) { @@ -1707,9 +1711,24 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return 0; } + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + mutex_lock(&dev_priv->drm.struct_mutex); - err = i915_subtests(tests, dev_priv); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + err = i915_subtests(tests, ctx); + +out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); + mock_file_free(dev_priv, file); + return err; } From 960e54652ceed1c0f49969c5d951688eab1d49cb Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:29:59 +0100 Subject: [PATCH 126/145] drm/i915: Separate RPS and RC6 handling for gen6+ This patch separates enable/disable of RC6 and RPS for gen6+ platforms prior to VLV. v2: Fixed checkpatch issue. (Sagar) Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg #1 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-2-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 17 ++++++------ drivers/gpu/drm/i915/intel_pm.c | 43 +++++++++++++++++++---------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5b58d2b897c7..a904f4e69c66 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1190,6 +1190,13 @@ static int i915_frequency_info(struct seq_file *m, void *unused) pm_iir = I915_READ(GEN8_GT_IIR(2)); pm_mask = I915_READ(GEN6_PMINTRMSK); } + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", @@ -1533,7 +1540,7 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0; @@ -1552,7 +1559,6 @@ static int gen6_drpc_info(struct seq_file *m) gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); if (INTEL_GEN(dev_priv) >= 9) { gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); @@ -1563,13 +1569,6 @@ static int gen6_drpc_info(struct seq_file *m) sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); mutex_unlock(&dev_priv->rps.hw_lock); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 39acfadb5a21..dfa9afe9cb61 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6334,9 +6334,13 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } @@ -6694,7 +6698,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6705,12 +6709,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ I915_WRITE(GEN6_RC_STATE, 0); /* Clear the DBG now so we don't confuse earlier errors */ @@ -6764,12 +6762,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) GEN6_RC_CTL_EI_MODE(1) | GEN6_RC_CTL_HW_ENABLE); - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); if (IS_GEN6(dev_priv) && ret) { @@ -6787,6 +6779,27 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void gen6_enable_rps(struct drm_i915_private *dev_priv) +{ + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; @@ -7936,6 +7949,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_disable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rc6(dev_priv); gen6_disable_rps(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_disable_drps(dev_priv); @@ -7972,6 +7986,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) gen8_enable_rps(dev_priv); gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_enable_rc6(dev_priv); gen6_enable_rps(dev_priv); gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { From 415544d5a89fb2be3b12dc7c4682806323aabdbf Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:00 +0100 Subject: [PATCH 127/145] drm/i915: Remove superfluous IS_BDW checks and non-BDW changes from gen8_enable_rps This patch removes all IS_BROADWELL checks and non-BDW changes from gen8_enable_rps as it is called only for BROADWELL. Suggested-by: Chris Wilson Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-3-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dfa9afe9cb61..21a72f660e0f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6644,23 +6644,16 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); + + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + rc6_mask); /* 4 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, From 3a85392c0ea0689d8d3ba1f9c9df5d3e32bfa517 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:01 +0100 Subject: [PATCH 128/145] drm/i915: Separate RPS and RC6 handling for BDW This patch separates RC6 and RPS enabling for BDW. RC6/RPS Disabling are handled through gen6 functions. PM Programming guide recommends a sequence within forcewakes to configure RC6, RPS and ring frequencies in sequence. With this patch the order is still maintained. v2: Update sequence numbers in RC6 programming and comment about intent of reset_rps during gen8_enable_rps. (Radoslaw) v3: Rebase. Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-4-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 21a72f660e0f..540e23ab51df 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6621,7 +6621,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen8_enable_rps(struct drm_i915_private *dev_priv) +static void gen8_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6630,7 +6630,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); - /* 1c & 1d: Get forcewake during program sequence. Although the driver + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6655,7 +6655,14 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) GEN7_RC_CTL_TO_MODE | rc6_mask); - /* 4 Program defaults and thresholds for RPS*/ + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void gen8_enable_rps(struct drm_i915_private *dev_priv) +{ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(dev_priv->rps.rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, @@ -6675,7 +6682,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | @@ -6684,8 +6691,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - /* 6: Ring frequency + overclocking (our driver does this later */ - reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -7976,6 +7981,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { + gen8_enable_rc6(dev_priv); gen8_enable_rps(dev_priv); gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { From 0d6fc92a73e0c4dd8635268ef0ffb852986bba89 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:02 +0100 Subject: [PATCH 129/145] drm/i915: Separate RPS and RC6 handling for VLV This patch separates enable/disable of RC6 and RPS for VLV. v2: Removed unnecessary comments about forcewakes while enabling RC6/RPS. Added changes to output turbo control status for VLV in i915_frequency_info. Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-5-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 24 ++++----- drivers/gpu/drm/i915/intel_pm.c | 75 ++++++++++++++++++----------- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a904f4e69c66..31ab92eda45d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1095,9 +1095,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq_sts; + u32 rpmodectl, freq_sts; mutex_lock(&dev_priv->rps.hw_lock); + + rpmodectl = I915_READ(GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); + freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -1508,21 +1518,11 @@ static void print_rc6_res(struct seq_file *m, static int vlv_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, rcctl1, pw_status; + u32 rcctl1, pw_status; pw_status = I915_READ(VLV_GTLC_PW_STATUS); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "Turbo enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 540e23ab51df..5fb08271b91c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6350,9 +6350,9 @@ static void cherryview_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) { - /* we're doing forcewake before Disabling RC6, + /* We're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6361,6 +6361,11 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -7283,11 +7288,11 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0; + u32 gtfifodbg, rc6_mode = 0; WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7300,12 +7305,46 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); + + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + if (intel_enable_rc6() & INTEL_RC6_ENABLE) + rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; + + intel_print_rc6_info(dev_priv, rc6_mode); + + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7322,30 +7361,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - if (intel_enable_rc6() & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - - intel_print_rc6_info(dev_priv, rc6_mode); - - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* Setting Fixed Bias */ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | @@ -7945,6 +7960,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) } else if (IS_CHERRYVIEW(dev_priv)) { cherryview_disable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rc6(dev_priv); valleyview_disable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_disable_rc6(dev_priv); @@ -7974,6 +7990,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_enable_rc6(dev_priv); valleyview_enable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); From d46b00dc38c8aea80357a0dd04f57c097dbfa5b9 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:03 +0100 Subject: [PATCH 130/145] drm/i915: Separate RPS and RC6 handling for CHV This patch separates enable/disable of RC6 and RPS for CHV. v2: Fixed comment. Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-6-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5fb08271b91c..4843e88a7f35 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6345,11 +6345,16 @@ static void gen6_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) { /* We're doing forcewake before Disabling RC6, @@ -7199,11 +7204,11 @@ static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) valleyview_cleanup_pctx(dev_priv); } -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; + u32 gtfifodbg, rc6_mode = 0, pcbr; WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7236,7 +7241,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC6_COUNT_EN | @@ -7252,7 +7257,18 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* 4 Program defaults and thresholds for RPS*/ + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1: Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7261,7 +7277,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | @@ -7958,6 +7974,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) gen9_disable_rc6(dev_priv); gen9_disable_rps(dev_priv); } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rc6(dev_priv); cherryview_disable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_disable_rc6(dev_priv); @@ -7988,6 +8005,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev_priv)) { + cherryview_enable_rc6(dev_priv); cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rc6(dev_priv); From ad1443f0f3dd1b2434af897af8b8f942e47cf8c3 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:04 +0100 Subject: [PATCH 131/145] drm/i915: Name i915_runtime_pm structure in dev_priv as "runtime_pm" We were using dev_priv->pm for runtime power management related state. This patch renames it to "runtime_pm" which looks more apt. v2: s/rpm/runtime_pm (Chris) Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg #1 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-7-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 8 ++++---- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 8 ++++---- drivers/gpu/drm/i915/intel_drv.h | 10 +++++----- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 +++++----- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 66fc156b294a..9ebbb08dcf2d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2544,12 +2544,12 @@ static int intel_runtime_suspend(struct device *kdev) intel_uncore_suspend(dev_priv); enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); - dev_priv->pm.suspended = true; + dev_priv->runtime_pm.suspended = true; /* * FIXME: We really should find a document that references the arguments @@ -2595,11 +2595,11 @@ static int intel_runtime_resume(struct device *kdev) DRM_DEBUG_KMS("Resuming device\n"); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); disable_rpm_wakeref_asserts(dev_priv); intel_opregion_notify_adapter(dev_priv, PCI_D0); - dev_priv->pm.suspended = false; + dev_priv->runtime_pm.suspended = false; if (intel_uncore_unclaimed_mmio(dev_priv)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 770305bdeabb..f44027f6e5e1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2533,7 +2533,7 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; - struct i915_runtime_pm pm; + struct i915_runtime_pm runtime_pm; struct { bool initialized; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index dc91b32d699e..653fb69e7ecb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1674,8 +1674,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); - error->suspended = dev_priv->pm.suspended; + error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); + error->suspended = dev_priv->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3736290f2d0c..915c5b9dc547 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4173,7 +4173,7 @@ int intel_irq_install(struct drm_i915_private *dev_priv) * interrupts as enabled _before_ actually enabling them to avoid * special cases in our ordering checks. */ - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); } @@ -4189,7 +4189,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) { drm_irq_uninstall(&dev_priv->drm); intel_hpd_cancel_work(dev_priv); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; } /** @@ -4202,7 +4202,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) { dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; synchronize_irq(dev_priv->drm.irq); } @@ -4215,7 +4215,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) { - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ceb3b8284c86..3fd428b99c37 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1254,7 +1254,7 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) * We only use drm_irq_uninstall() at unload and VT switch, so * this is the only thing we need to check. */ - return dev_priv->pm.irqs_enabled; + return dev_priv->runtime_pm.irqs_enabled; } int intel_get_crtc_scanline(struct intel_crtc *crtc); @@ -1790,7 +1790,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) { - WARN_ONCE(dev_priv->pm.suspended, + WARN_ONCE(dev_priv->runtime_pm.suspended, "Device suspended during HW access\n"); } @@ -1798,7 +1798,7 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } @@ -1823,7 +1823,7 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) static inline void disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -1840,7 +1840,7 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) static inline void enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); } void intel_runtime_pm_get(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4843e88a7f35..831054084fb7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -9350,8 +9350,8 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) __intel_autoenable_gt_powersave); atomic_set(&dev_priv->rps.num_waiters, 0); - dev_priv->pm.suspended = false; - atomic_set(&dev_priv->pm.wakeref_count, 0); + dev_priv->runtime_pm.suspended = false; + atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c4e1aba83c3e..7348c16c4a96 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -187,7 +187,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; bool is_enabled; - if (dev_priv->pm.suspended) + if (dev_priv->runtime_pm.suspended) return false; is_enabled = true; @@ -3128,7 +3128,7 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); } @@ -3162,7 +3162,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) return false; } - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); return true; @@ -3193,7 +3193,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) assert_rpm_wakelock_held(dev_priv); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -3210,7 +3210,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); From 9f817501bd7facfe2bffacd637f4332e5991e57a Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:05 +0100 Subject: [PATCH 132/145] drm/i915: Move rps.hw_lock to dev_priv and s/hw_lock/pcu_lock In order to separate GT PM related functionality into new structure we are updating rps structure. hw_lock in it is used for display related PCU communication too hence move it to dev_priv. Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-8-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 24 ++++----- drivers/gpu/drm/i915/i915_drv.h | 16 +++--- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/i915_sysfs.c | 20 +++---- drivers/gpu/drm/i915/intel_cdclk.c | 40 +++++++------- drivers/gpu/drm/i915/intel_display.c | 12 ++--- drivers/gpu/drm/i915/intel_pm.c | 72 ++++++++++++------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 16 +++--- drivers/gpu/drm/i915/intel_sideband.c | 6 +-- 9 files changed, 105 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 31ab92eda45d..e733097fa647 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1097,7 +1097,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 rpmodectl, freq_sts; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); rpmodectl = I915_READ(GEN6_RP_CONTROL); seq_printf(m, "Video Turbo Mode: %s\n", @@ -1130,7 +1130,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1565,9 +1565,9 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); @@ -1842,7 +1842,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) goto out; @@ -1873,7 +1873,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 8) & 0xff) * 100); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: intel_runtime_pm_put(dev_priv); @@ -4320,7 +4320,7 @@ i915_max_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4333,7 +4333,7 @@ i915_max_freq_set(void *data, u64 val) hw_min = dev_priv->rps.min_freq; if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } @@ -4342,7 +4342,7 @@ i915_max_freq_set(void *data, u64 val) if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4375,7 +4375,7 @@ i915_min_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4389,7 +4389,7 @@ i915_min_freq_set(void *data, u64 val) if (val < hw_min || val > hw_max || val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } @@ -4398,7 +4398,7 @@ i915_min_freq_set(void *data, u64 val) if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f44027f6e5e1..fca7b939495f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1364,14 +1364,6 @@ struct intel_gen6_power_mgmt { /* manual wa residency calculations */ struct intel_rps_ei ei; - - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex hw_lock; }; /* defined intel_pm.c */ @@ -2421,6 +2413,14 @@ struct drm_i915_private { /* Cannot be determined by PCIID. You must always read a register. */ u32 edram_cap; + /* + * Protects RPS/RC6 register access and PCU communication. + * Must be taken after struct_mutex if nested. Note that + * this lock may be held for long periods of time when + * talking to hw - so only take it when talking to hw! + */ + struct mutex pcu_lock; + /* gen6+ rps state */ struct intel_gen6_power_mgmt rps; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 915c5b9dc547..1844d3fe8f1f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1181,7 +1181,7 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); @@ -1235,7 +1235,7 @@ static void gen6_pm_rps_work(struct work_struct *work) dev_priv->rps.last_adj = 0; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61c8727f756..79fbab49d1d0 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -246,7 +246,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 freq; freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -261,7 +261,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; ret = intel_gpu_freq(dev_priv, ret); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -304,9 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) return -EINVAL; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); dev_priv->rps.boost_freq = val; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return count; } @@ -344,14 +344,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq || val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } @@ -371,7 +371,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -401,14 +401,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq || val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } @@ -424,7 +424,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 87fc42b19336..b2a6d62b71c0 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -503,7 +503,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, else cmd = 0; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -513,7 +513,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->sb_lock); @@ -590,7 +590,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -600,7 +600,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -656,10 +656,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -712,9 +712,9 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -928,12 +928,12 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, WARN_ON((cdclk == 24000) != (vco == 0)); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -975,9 +975,9 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1268,10 +1268,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, } /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", @@ -1300,10 +1300,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", @@ -1518,12 +1518,12 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider, pcu_ack; int ret; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1575,9 +1575,9 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, val); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e03b0c3d6f9f..b2c5fba102e1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4946,10 +4946,10 @@ void hsw_enable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -4979,9 +4979,9 @@ void hsw_disable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* wait for pcode to finish disabling IPS, which may take up to 42ms */ if (intel_wait_for_register(dev_priv, IPS_CTL, IPS_ENABLE, 0, @@ -8839,11 +8839,11 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 831054084fb7..512f2b0513e0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -322,7 +322,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -337,14 +337,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (enable) @@ -353,7 +353,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -2790,11 +2790,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2811,11 +2811,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3608,13 +3608,13 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for the SAGV when enabling */ - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3645,14 +3645,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -5621,7 +5621,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -5651,7 +5651,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_DDR_DVFS; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(dev, crtc) { @@ -6224,7 +6224,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_busy(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (dev_priv->rps.enabled) { u8 freq; @@ -6247,7 +6247,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) @@ -6259,7 +6259,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (dev_priv->rps.enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); @@ -6269,7 +6269,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_boost(struct drm_i915_gem_request *rq, @@ -6306,7 +6306,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { int err; - lockdep_assert_held(&dev_priv->rps.hw_lock); + lockdep_assert_held(&dev_priv->pcu_lock); GEM_BUG_ON(val > dev_priv->rps.max_freq); GEM_BUG_ON(val < dev_priv->rps.min_freq); @@ -6715,7 +6715,7 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) int rc6_mode; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); I915_WRITE(GEN6_RC_STATE, 0); @@ -6789,7 +6789,7 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) static void gen6_enable_rps(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* Here begins a magic sequence of register writes to enable * auto-downclocking. @@ -6817,7 +6817,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int scaling_factor = 180; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); policy = cpufreq_cpu_get(0); if (policy) { @@ -7210,7 +7210,7 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) enum intel_engine_id id; u32 gtfifodbg, rc6_mode = 0, pcbr; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); @@ -7264,7 +7264,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -7310,7 +7310,7 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) enum intel_engine_id id; u32 gtfifodbg, rc6_mode = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); valleyview_check_pctx(dev_priv); @@ -7357,7 +7357,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -7881,7 +7881,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) } mutex_lock(&dev_priv->drm.struct_mutex); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -7921,7 +7921,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) /* Finally allow us to boost to max by default */ dev_priv->rps.boost_freq = dev_priv->rps.max_freq; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->drm.struct_mutex); intel_autoenable_gt_powersave(dev_priv); @@ -7968,7 +7968,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) if (!READ_ONCE(dev_priv->rps.enabled)) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (INTEL_GEN(dev_priv) >= 9) { gen9_disable_rc6(dev_priv); @@ -7987,7 +7987,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) } dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) @@ -8002,7 +8002,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (intel_vgpu_active(dev_priv)) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rc6(dev_priv); @@ -8035,7 +8035,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void __intel_autoenable_gt_powersave(struct work_struct *work) @@ -9123,7 +9123,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9170,7 +9170,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9247,7 +9247,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9344,7 +9344,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->pcu_lock); INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, __intel_autoenable_gt_powersave); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 7348c16c4a96..8af286c63d3b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -785,7 +785,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : PUNIT_PWRGT_PWR_GATE(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -806,7 +806,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -833,7 +833,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(power_well_id); ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -852,7 +852,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1364,7 +1364,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); /* @@ -1381,7 +1381,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1396,7 +1396,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) @@ -1417,7 +1417,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7d971cb56116..75c872bb8cc9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -81,7 +81,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -95,7 +95,7 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -125,7 +125,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, From 562d9bae08a10335368bf54ea5cc7e4f6185bccc Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:06 +0100 Subject: [PATCH 133/145] drm/i915: Name structure in dev_priv that contains RPS/RC6 state as "gt_pm" Prepared substructure rps for RPS related state. autoenable_work is used for RC6 too hence it is defined outside rps structure. As we do this lot many functions are refactored to use intel_rps *rps to access rps related members. Hence renamed intel_rps_client pointer variables to rps_client in various functions. v2: Rebase. v3: s/pm/gt_pm (Chris) Refactored access to rps structure by declaring struct intel_rps * in many functions. Signed-off-by: Sagar Arun Kamble Cc: Chris Wilson Cc: Imre Deak Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg #1 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-9-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 99 +++---- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 14 +- drivers/gpu/drm/i915/i915_gem.c | 21 +- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 10 +- drivers/gpu/drm/i915/i915_irq.c | 87 +++--- drivers/gpu/drm/i915/i915_sysfs.c | 54 ++-- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 315 ++++++++++++--------- 10 files changed, 330 insertions(+), 276 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e733097fa647..0bb6e01121fc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1080,6 +1080,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1116,20 +1117,20 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; @@ -1210,7 +1211,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - dev_priv->rps.pm_intrmsk_mbz); + rps->pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); @@ -1230,8 +1231,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); seq_printf(m, "RP PREV UP: %d (%dus)\n", rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - dev_priv->rps.up_threshold); + seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold); seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); @@ -1239,8 +1239,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); seq_printf(m, "RP PREV DOWN: %d (%dus)\n", rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - dev_priv->rps.down_threshold); + seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -1262,22 +1261,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1831,6 +1830,7 @@ static int i915_emon_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; @@ -1848,13 +1848,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = - dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = - dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq_softlimit; - max_gpu_freq = dev_priv->rps.max_freq_softlimit; + min_gpu_freq = rps->min_freq_softlimit; + max_gpu_freq = rps->max_freq_softlimit; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -2307,25 +2305,26 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct drm_file *file; - seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); + seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", - atomic_read(&dev_priv->rps.num_waiters)); + atomic_read(&rps->num_waiters)); seq_printf(m, "Frequency requested %d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->min_freq), + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq), + intel_gpu_freq(dev_priv, rps->efficient_freq), + intel_gpu_freq(dev_priv, rps->boost_freq)); mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(file, &dev->filelist, lhead) { @@ -2337,15 +2336,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "%s [%d]: %d boosts\n", task ? task->comm : "", task ? task->pid : -1, - atomic_read(&file_priv->rps.boosts)); + atomic_read(&file_priv->rps_client.boosts)); rcu_read_unlock(); } seq_printf(m, "Kernel (anonymous) boosts: %d\n", - atomic_read(&dev_priv->rps.boosts)); + atomic_read(&rps->boosts)); mutex_unlock(&dev->filelist_mutex); if (INTEL_GEN(dev_priv) >= 6 && - dev_priv->rps.enabled && + rps->enabled && dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -2358,13 +2357,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", - rps_power_to_str(dev_priv->rps.power)); + rps_power_to_str(rps->power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", rpup && rpupei ? 100 * rpup / rpupei : 0, - dev_priv->rps.up_threshold); + rps->up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, - dev_priv->rps.down_threshold); + rps->down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); } @@ -4304,7 +4303,7 @@ i915_max_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); return 0; } @@ -4312,6 +4311,7 @@ static int i915_max_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4329,15 +4329,15 @@ i915_max_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; - if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { + if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); @@ -4359,7 +4359,7 @@ i915_min_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); return 0; } @@ -4367,6 +4367,7 @@ static int i915_min_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4384,16 +4385,16 @@ i915_min_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; if (val < hw_min || - val > hw_max || val > dev_priv->rps.max_freq_softlimit) { + val > hw_max || val > rps->max_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9ebbb08dcf2d..9b8a19149154 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2502,7 +2502,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rps.enabled && intel_enable_rc6()))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fca7b939495f..521348ee7242 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -609,7 +609,7 @@ struct drm_i915_file_private { struct intel_rps_client { atomic_t boosts; - } rps; + } rps_client; unsigned int bsd_engine; @@ -1317,7 +1317,7 @@ struct intel_rps_ei { u32 media_c0; }; -struct intel_gen6_power_mgmt { +struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1358,7 +1358,6 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; - struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; @@ -1366,6 +1365,11 @@ struct intel_gen6_power_mgmt { struct intel_rps_ei ei; }; +struct intel_gen6_power_mgmt { + struct intel_rps rps; + struct delayed_work autoenable_work; +}; + /* defined intel_pm.c */ extern spinlock_t mchdev_lock; @@ -2421,8 +2425,8 @@ struct drm_i915_private { */ struct mutex pcu_lock; - /* gen6+ rps state */ - struct intel_gen6_power_mgmt rps; + /* gen6+ GT PM state */ + struct intel_gen6_power_mgmt gt_pm; /* ilk-only ips/rps state. Everything in here is protected by the global * mchdev_lock in intel_pm.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e829e8c900e8..f76890b74d00 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -358,7 +358,7 @@ static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { struct drm_i915_gem_request *rq; @@ -391,11 +391,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps) { + if (rps_client) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps); + gen6_rps_boost(rq, rps_client); else - rps = NULL; + rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -411,7 +411,7 @@ static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -430,7 +430,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, - rps); + rps_client); if (timeout < 0) break; @@ -447,7 +447,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) { - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + timeout = i915_gem_object_wait_fence(excl, flags, timeout, + rps_client); prune_fences = timeout >= 0; } @@ -543,7 +544,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) @@ -555,7 +556,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout, - rps); + rps_client); return timeout < 0 ? timeout : 0; } @@ -563,7 +564,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) { struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; + return &fpriv->rps_client; } static int diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b100b38f1dd2..d5f4023e5d63 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -416,7 +416,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) spin_lock_irq(&request->lock); if (request->waitboost) - atomic_dec(&request->i915->rps.num_waiters); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); dma_fence_signal_locked(&request->fence); spin_unlock_irq(&request->lock); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index f15de4dcefde..a2e8114b739d 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1028,6 +1028,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1064,12 +1065,13 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will * result in the register bit being left SET! */ - dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } static void guc_interrupts_release(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1088,8 +1090,8 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) I915_WRITE(GUC_VCS2_VCS1_IER, 0); I915_WRITE(GUC_WD_VECS_IER, 0); - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } int i915_guc_submission_enable(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1844d3fe8f1f..b1296a55c1e4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -404,19 +404,21 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); - dev_priv->rps.pm_iir = 0; + dev_priv->gt_pm.rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); } void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - WARN_ON_ONCE(dev_priv->rps.pm_iir); + WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - dev_priv->rps.interrupts_enabled = true; + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -424,11 +426,13 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (!READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.interrupts_enabled = false; + rps->interrupts_enabled = false; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); @@ -442,7 +446,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) * we will reset the GPU to minimum frequencies, so the current * state of the worker can be discarded. */ - cancel_work_sync(&dev_priv->rps.work); + cancel_work_sync(&rps->work); gen6_reset_rps_interrupts(dev_priv); } @@ -1119,12 +1123,13 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { - const struct intel_rps_ei *prev = &dev_priv->rps.ei; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; u32 events = 0; @@ -1151,28 +1156,29 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) c0 = max(render, media); c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - if (c0 > time * dev_priv->rps.up_threshold) + if (c0 > time * rps->up_threshold) events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * dev_priv->rps.down_threshold) + else if (c0 < time * rps->down_threshold) events = GEN6_PM_RP_DOWN_THRESHOLD; } - dev_priv->rps.ei = now; + rps->ei = now; return events; } static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, rps.work); + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_rps *rps = &dev_priv->gt_pm.rps; bool client_boost = false; int new_delay, adj, min, max; u32 pm_iir = 0; spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); - client_boost = atomic_read(&dev_priv->rps.num_waiters); + if (rps->interrupts_enabled) { + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); } spin_unlock_irq(&dev_priv->irq_lock); @@ -1185,14 +1191,14 @@ static void gen6_pm_rps_work(struct work_struct *work) pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - adj = dev_priv->rps.last_adj; - new_delay = dev_priv->rps.cur_freq; - min = dev_priv->rps.min_freq_softlimit; - max = dev_priv->rps.max_freq_softlimit; + adj = rps->last_adj; + new_delay = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; if (client_boost) - max = dev_priv->rps.max_freq; - if (client_boost && new_delay < dev_priv->rps.boost_freq) { - new_delay = dev_priv->rps.boost_freq; + max = rps->max_freq; + if (client_boost && new_delay < rps->boost_freq) { + new_delay = rps->boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1200,15 +1206,15 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - if (new_delay >= dev_priv->rps.max_freq_softlimit) + if (new_delay >= rps->max_freq_softlimit) adj = 0; } else if (client_boost) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) - new_delay = dev_priv->rps.efficient_freq; - else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) - new_delay = dev_priv->rps.min_freq_softlimit; + if (rps->cur_freq > rps->efficient_freq) + new_delay = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_delay = rps->min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { if (adj < 0) @@ -1216,13 +1222,13 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - if (new_delay <= dev_priv->rps.min_freq_softlimit) + if (new_delay <= rps->min_freq_softlimit) adj = 0; } else { /* unknown event */ adj = 0; } - dev_priv->rps.last_adj = adj; + rps->last_adj = adj; /* sysfs frequency interfaces may have snuck in while servicing the * interrupt @@ -1232,7 +1238,7 @@ static void gen6_pm_rps_work(struct work_struct *work) if (intel_set_rps(dev_priv, new_delay)) { DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - dev_priv->rps.last_adj = 0; + rps->last_adj = 0; } mutex_unlock(&dev_priv->pcu_lock); @@ -1240,7 +1246,7 @@ static void gen6_pm_rps_work(struct work_struct *work) out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) + if (rps->interrupts_enabled) gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } @@ -1721,12 +1727,14 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * the work queue. */ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&dev_priv->rps.work); + if (rps->interrupts_enabled) { + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; + schedule_work(&rps->work); } spin_unlock(&dev_priv->irq_lock); } @@ -4007,11 +4015,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&rps->work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) @@ -4027,7 +4036,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - dev_priv->rps.pm_intrmsk_mbz = 0; + rps->pm_intrmsk_mbz = 0; /* * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer @@ -4036,10 +4045,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) * TODO: verify if this can be reproduced on VLV,CHV. */ if (INTEL_GEN(dev_priv) <= 7) - dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_GEN(dev_priv) >= 8) - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 79fbab49d1d0..0a57f9867f7f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -275,7 +275,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.cur_freq)); + dev_priv->gt_pm.rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -284,7 +284,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.boost_freq)); + dev_priv->gt_pm.rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -292,6 +292,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -301,11 +302,11 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, /* Validate against (static) hardware limits */ val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; mutex_lock(&dev_priv->pcu_lock); - dev_priv->rps.boost_freq = val; + rps->boost_freq = val; mutex_unlock(&dev_priv->pcu_lock); return count; @@ -318,7 +319,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.efficient_freq)); + dev_priv->gt_pm.rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -327,7 +328,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.max_freq_softlimit)); + dev_priv->gt_pm.rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -335,6 +336,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -348,23 +350,23 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val < dev_priv->rps.min_freq_softlimit) { + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - if (val > dev_priv->rps.rp0_freq) + if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", intel_gpu_freq(dev_priv, val)); - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though @@ -384,7 +386,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.min_freq_softlimit)); + dev_priv->gt_pm.rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -392,6 +394,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -405,19 +408,19 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val > dev_priv->rps.max_freq_softlimit) { + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though @@ -448,14 +451,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq); + val = intel_gpu_freq(dev_priv, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq); + val = intel_gpu_freq(dev_priv, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq); + val = intel_gpu_freq(dev_priv, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3fd428b99c37..53acfc475e35 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1243,7 +1243,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) { - return mask & ~i915->rps.pm_intrmsk_mbz; + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; } void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 512f2b0513e0..9097489e1993 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5988,6 +5988,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv) */ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 limits; /* Only set the down limit when we've reached the lowest level to avoid @@ -5997,13 +5998,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ if (INTEL_GEN(dev_priv) >= 9) { - limits = (dev_priv->rps.max_freq_softlimit) << 23; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= (dev_priv->rps.min_freq_softlimit) << 14; + limits = (rps->max_freq_softlimit) << 23; + if (val <= rps->min_freq_softlimit) + limits |= (rps->min_freq_softlimit) << 14; } else { - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; } return limits; @@ -6011,39 +6012,40 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int new_power; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { + new_power = rps->power; + switch (rps->power) { case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && - val > dev_priv->rps.cur_freq) + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) new_power = BETWEEN; break; case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && - val < dev_priv->rps.cur_freq) + if (val <= rps->efficient_freq && + val < rps->cur_freq) new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && - val > dev_priv->rps.cur_freq) + else if (val >= rps->rp0_freq && + val > rps->cur_freq) new_power = HIGH_POWER; break; case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && - val < dev_priv->rps.cur_freq) + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) new_power = BETWEEN; break; } /* Max/min bins are special */ - if (val <= dev_priv->rps.min_freq_softlimit) + if (val <= rps->min_freq_softlimit) new_power = LOW_POWER; - if (val >= dev_priv->rps.max_freq_softlimit) + if (val >= rps->max_freq_softlimit) new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) + if (new_power == rps->power) return; /* Note the units here are not exactly 1us, but 1280ns. */ @@ -6106,20 +6108,21 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_DOWN_IDLE_AVG); skip_hw_write: - dev_priv->rps.power = new_power; - dev_priv->rps.up_threshold = threshold_up; - dev_priv->rps.down_threshold = threshold_down; - dev_priv->rps.last_adj = 0; + rps->power = new_power; + rps->up_threshold = threshold_up; + rps->down_threshold = threshold_down; + rps->last_adj = 0; } static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 mask = 0; /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > dev_priv->rps.min_freq_softlimit) + if (val > rps->min_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) + if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; mask &= dev_priv->pm_rps_events; @@ -6132,10 +6135,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* min/max delay may still have been modified so be sure to * write the limits value. */ - if (val != dev_priv->rps.cur_freq) { + if (val != rps->cur_freq) { gen6_set_rps_thresholds(dev_priv, val); if (INTEL_GEN(dev_priv) >= 9) @@ -6157,7 +6162,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - dev_priv->rps.cur_freq = val; + rps->cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6173,7 +6178,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - if (val != dev_priv->rps.cur_freq) { + if (val != dev_priv->gt_pm.rps.cur_freq) { err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); if (err) return err; @@ -6181,7 +6186,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) gen6_set_rps_thresholds(dev_priv, val); } - dev_priv->rps.cur_freq = val; + dev_priv->gt_pm.rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6196,10 +6201,11 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - u32 val = dev_priv->rps.idle_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val = rps->idle_freq; int err; - if (dev_priv->rps.cur_freq <= val) + if (rps->cur_freq <= val) return; /* The punit delays the write of the frequency and voltage until it @@ -6224,27 +6230,29 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_busy(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + mutex_lock(&dev_priv->pcu_lock); - if (dev_priv->rps.enabled) { + if (rps->enabled) { u8 freq; if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); gen6_enable_rps_interrupts(dev_priv); /* Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(dev_priv->rps.cur_freq, - dev_priv->rps.efficient_freq); + freq = max(rps->cur_freq, + rps->efficient_freq); if (intel_set_rps(dev_priv, clamp(freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit))) + rps->min_freq_softlimit, + rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->pcu_lock); @@ -6252,6 +6260,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) void gen6_rps_idle(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* Flush our bottom-half so that it does not race with us * setting the idle frequency and so that it is bounded by * our rpm wakeref. And then disable the interrupts to stop any @@ -6260,12 +6270,12 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) gen6_disable_rps_interrupts(dev_priv); mutex_lock(&dev_priv->pcu_lock); - if (dev_priv->rps.enabled) { + if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); - dev_priv->rps.last_adj = 0; + gen6_set_rps(dev_priv, rps->idle_freq); + rps->last_adj = 0; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } @@ -6273,22 +6283,22 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) } void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { - struct drm_i915_private *i915 = rq->i915; + struct intel_rps *rps = &rq->i915->gt_pm.rps; unsigned long flags; bool boost; /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!i915->rps.enabled) + if (!rps->enabled) return; boost = false; spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !i915_gem_request_completed(rq)) { - atomic_inc(&i915->rps.num_waiters); + atomic_inc(&rps->num_waiters); rq->waitboost = true; boost = true; } @@ -6296,22 +6306,23 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (!boost) return; - if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) - schedule_work(&i915->rps.work); + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); - atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); + atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; lockdep_assert_held(&dev_priv->pcu_lock); - GEM_BUG_ON(val > dev_priv->rps.max_freq); - GEM_BUG_ON(val < dev_priv->rps.min_freq); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); - if (!dev_priv->rps.enabled) { - dev_priv->rps.cur_freq = val; + if (!rps->enabled) { + rps->cur_freq = val; return 0; } @@ -6493,24 +6504,26 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(dev_priv)) { u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; } /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + rps->max_freq = rps->rp0_freq; - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; @@ -6518,33 +6531,34 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (sandybridge_pcode_read(dev_priv, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, &ddcc_status) == 0) - dev_priv->rps.efficient_freq = + rps->efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), - dev_priv->rps.min_freq, - dev_priv->rps.max_freq); + rps->min_freq, + rps->max_freq); } if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ - dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; } } static void reset_rps(struct drm_i915_private *dev_priv, int (*set)(struct drm_i915_private *, u8)) { - u8 freq = dev_priv->rps.cur_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u8 freq = rps->cur_freq; /* force a reset */ - dev_priv->rps.power = -1; - dev_priv->rps.cur_freq = -1; + rps->power = -1; + rps->cur_freq = -1; if (set(dev_priv, freq)) DRM_ERROR("Failed to reset RPS to initial values\n"); @@ -6557,7 +6571,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); /* 1 second timeout*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, @@ -6670,20 +6684,22 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv) static void gen8_enable_rps(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* 1 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ /* Docs recommend 900MHz, and 300 MHz respectively */ I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + rps->max_freq_softlimit << 24 | + rps->min_freq_softlimit << 16); I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ @@ -6810,6 +6826,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int min_freq = 15; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; @@ -6840,11 +6857,11 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq; - max_gpu_freq = dev_priv->rps.max_freq; + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; } /* @@ -7095,17 +7112,18 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { - dev_priv->rps.gpll_ref_freq = + dev_priv->gt_pm.rps.gpll_ref_freq = vlv_get_cck_clock(dev_priv, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, dev_priv->czclk_freq); DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->rps.gpll_ref_freq); + dev_priv->gt_pm.rps.gpll_ref_freq); } static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; valleyview_setup_pctx(dev_priv); @@ -7127,30 +7145,31 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = valleyview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + rps->min_freq = valleyview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; cherryview_setup_pctx(dev_priv); @@ -7171,31 +7190,29 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = cherryview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + rps->min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, "Odd GPU freq values\n"); } @@ -7584,7 +7601,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) lockdep_assert_held(&mchdev_lock); - pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; ext_v = pvid_to_extvid(dev_priv, pxvid); @@ -7871,6 +7888,8 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. @@ -7892,16 +7911,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = + rps->min_freq_softlimit = max_t(int, - dev_priv->rps.efficient_freq, + rps->efficient_freq, intel_freq_opcode(dev_priv, 450)); /* After setting max-softlimit, find the overclock max freq */ @@ -7912,14 +7931,14 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); if (params & BIT(31)) { /* OC supported */ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (dev_priv->rps.max_freq & 0xff) * 50, + (rps->max_freq & 0xff) * 50, (params & 0xff) * 50); - dev_priv->rps.max_freq = params & 0xff; + rps->max_freq = params & 0xff; } } /* Finally allow us to boost to max by default */ - dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + rps->boost_freq = rps->max_freq; mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -7949,7 +7968,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) intel_runtime_pm_put(dev_priv); /* gen6_rps_idle() will be called later to disable interrupts */ @@ -7957,7 +7976,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - dev_priv->rps.enabled = true; /* force disabling */ + dev_priv->gt_pm.rps.enabled = true; /* force disabling */ intel_disable_gt_powersave(dev_priv); gen6_reset_rps_interrupts(dev_priv); @@ -7965,7 +7984,9 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - if (!READ_ONCE(dev_priv->rps.enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (!READ_ONCE(rps->enabled)) return; mutex_lock(&dev_priv->pcu_lock); @@ -7986,16 +8007,18 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) ironlake_disable_drps(dev_priv); } - dev_priv->rps.enabled = false; + rps->enabled = false; mutex_unlock(&dev_priv->pcu_lock); } void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* We shouldn't be disabling as we submit, so this should be less * racy than it appears! */ - if (READ_ONCE(dev_priv->rps.enabled)) + if (READ_ONCE(rps->enabled)) return; /* Powersaving is controlled by the host when inside a VM */ @@ -8028,24 +8051,26 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) intel_init_emon(dev_priv); } - WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); - WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); - dev_priv->rps.enabled = true; + rps->enabled = true; mutex_unlock(&dev_priv->pcu_lock); } static void __intel_autoenable_gt_powersave(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + container_of(work, + typeof(*dev_priv), + gt_pm.autoenable_work.work); struct intel_engine_cs *rcs; struct drm_i915_gem_request *req; - if (READ_ONCE(dev_priv->rps.enabled)) + if (READ_ONCE(dev_priv->gt_pm.rps.enabled)) goto out; rcs = dev_priv->engine[RCS]; @@ -8075,7 +8100,7 @@ out: void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.enabled)) + if (READ_ONCE(dev_priv->gt_pm.rps.enabled)) return; if (IS_IRONLAKE_M(dev_priv)) { @@ -8095,7 +8120,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) * runtime resume it's necessary). */ if (queue_delayed_work(dev_priv->wq, - &dev_priv->rps.autoenable_work, + &dev_priv->gt_pm.autoenable_work, round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } @@ -9289,31 +9314,39 @@ out: static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val - 0xb7 * Slow = Fast = GPLL ref * N */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val / 2 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; } int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -9346,9 +9379,9 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->pcu_lock); - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, __intel_autoenable_gt_powersave); - atomic_set(&dev_priv->rps.num_waiters, 0); + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); From 771decb0b4d75b5cc1d7ddfbdf512bd8768d2793 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:07 +0100 Subject: [PATCH 134/145] drm/i915: Rename intel_enable_rc6 to intel_rc6_enabled This function gives the status of RC6, whether disabled or if enabled then which state. intel_enable_rc6 will be used for enabling RC6 in the next patch. v2: Rebase. v3: Rebase. Signed-off-by: Sagar Arun Kamble Cc: Chris Wilson Cc: Imre Deak Cc: Joonas Lahtinen Reviewed-by: Radoslaw Szwichtenberg #1 Reviewed-by: Ewelina Musial #1 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-10-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_guc.c | 3 ++- drivers/gpu/drm/i915/intel_pm.c | 12 ++++++------ 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9b8a19149154..f3ac1f45e154 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2502,7 +2502,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rps.enabled && intel_enable_rc6()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rps.enabled && intel_rc6_enabled()))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 0a57f9867f7f..791759f632e1 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -49,7 +49,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, static ssize_t show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6()); + return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled()); } static ssize_t diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 53acfc475e35..cdda0a84babe 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1903,7 +1903,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -static inline int intel_enable_rc6(void) +static inline int intel_rc6_enabled(void) { return i915_modparams.enable_rc6; } diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index bbe4c328e9fd..9e18c4fb9909 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -137,7 +137,8 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + if (!intel_rc6_enabled() || + NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) action[1] = 0; else /* bit 0 and 1 are for Render and Media domain separately */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9097489e1993..16f8afbbc5db 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6625,7 +6625,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ @@ -6671,7 +6671,7 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); @@ -6766,7 +6766,7 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(); + rc6_mode = intel_rc6_enabled(); if (rc6_mode & INTEL_RC6_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; @@ -7268,7 +7268,7 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && + if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && (pcbr >> VLV_PCBR_ADDR_SHIFT)) rc6_mode = GEN7_RC_CTL_TO_MODE; @@ -7360,7 +7360,7 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; intel_print_rc6_info(dev_priv, rc6_mode); @@ -9437,7 +9437,7 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, { u64 time_hw, units, div; - if (!intel_enable_rc6()) + if (!intel_rc6_enabled()) return 0; intel_runtime_pm_get(dev_priv); From 0870a2a4a3d0aab568ce5729bd99f43f96825f85 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:08 +0100 Subject: [PATCH 135/145] drm/i915: Create generic function to setup LLC ring frequency table Prepared intel_update_ring_freq function to setup ring frequency for applicable platforms determined by macro HAS_LLC. v2: Replaced NEEDS_RING_FREQ_UPDATE with HAS_LLC macro. (Chris) Added check while calling from intel_enable_gt_powersave. v3: s/intel_update_ring_freq/intel_enable_llc_pstate and created new placeholder function intel_disable_llc_pstate. (Chris) Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-11-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 16f8afbbc5db..238d405e2fb2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7982,6 +7982,13 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->pcu_lock); + + /* Currently there is no HW configuration to be done to disable. */ +} + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8007,10 +8014,20 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) ironlake_disable_drps(dev_priv); } + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); + rps->enabled = false; mutex_unlock(&dev_priv->pcu_lock); } +static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->pcu_lock); + + gen6_update_ring_freq(i915); +} + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8036,21 +8053,20 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) - gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rc6(dev_priv); gen8_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rc6(dev_priv); gen6_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); } + if (HAS_LLC(dev_priv)) + intel_enable_llc_pstate(dev_priv); + WARN_ON(rps->max_freq < rps->min_freq); WARN_ON(rps->idle_freq > rps->max_freq); From fc77426a8d69d6a706378a53b18c882578af44e5 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:09 +0100 Subject: [PATCH 136/145] drm/i915: Create generic functions to control RC6, RPS Prepared generic functions intel_enable_rc6, intel_disable_rc6, intel_enable_rps and intel_disable_rps functions to setup RC6/RPS based on platforms. v2: Make intel_enable/disable_rc6/rps static. (Chris) v3: Added lockdep_assert_held(dev_priv->pcu_lock) in new generic functions. (Chris) Removed WARN_ON(&dev_priv->pcu_lock) from lower level functions as generic function now has lockdep_assert. Rebase. Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-12-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 132 +++++++++++++++++++------------- 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 238d405e2fb2..a4d431d3980a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6731,8 +6731,6 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) int rc6_mode; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - I915_WRITE(GEN6_RC_STATE, 0); /* Clear the DBG now so we don't confuse earlier errors */ @@ -6805,8 +6803,6 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) static void gen6_enable_rps(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - /* Here begins a magic sequence of register writes to enable * auto-downclocking. * @@ -7227,8 +7223,6 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) enum intel_engine_id id; u32 gtfifodbg, rc6_mode = 0, pcbr; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); if (gtfifodbg) { @@ -7281,8 +7275,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* 1: Program defaults and thresholds for RPS*/ @@ -7327,8 +7319,6 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) enum intel_engine_id id; u32 gtfifodbg, rc6_mode = 0; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - valleyview_check_pctx(dev_priv); gtfifodbg = I915_READ(GTFIFODBG); @@ -7374,8 +7364,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; - WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); @@ -7989,6 +7977,36 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) /* Currently there is no HW configuration to be done to disable. */ } +static void intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rps(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rps(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rps(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rps(dev_priv); + else if (IS_IRONLAKE_M(dev_priv)) + ironlake_disable_drps(dev_priv); +} + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7998,22 +8016,8 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->pcu_lock); - if (INTEL_GEN(dev_priv) >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - cherryview_disable_rc6(dev_priv); - cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_disable_rc6(dev_priv); - valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_disable_rc6(dev_priv); - gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } - + intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); @@ -8028,6 +8032,50 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) gen6_update_ring_freq(i915); } +static void intel_enable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 9) + gen9_enable_rc6(dev_priv); + else if (IS_BROADWELL(dev_priv)) + gen8_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_enable_rc6(dev_priv); +} + +static void intel_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + lockdep_assert_held(&dev_priv->pcu_lock); + + if (IS_CHERRYVIEW(dev_priv)) { + cherryview_enable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_enable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 9) { + gen9_enable_rps(dev_priv); + } else if (IS_BROADWELL(dev_priv)) { + gen8_enable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_enable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); + } + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8044,35 +8092,11 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->pcu_lock); - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_enable_rc6(dev_priv); - cherryview_enable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_enable_rc6(dev_priv); - valleyview_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rc6(dev_priv); - gen9_enable_rps(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - gen8_enable_rc6(dev_priv); - gen8_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_enable_rc6(dev_priv); - gen6_enable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } - + intel_enable_rc6(dev_priv); + intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); - WARN_ON(rps->max_freq < rps->min_freq); - WARN_ON(rps->idle_freq > rps->max_freq); - - WARN_ON(rps->efficient_freq < rps->min_freq); - WARN_ON(rps->efficient_freq > rps->max_freq); - rps->enabled = true; mutex_unlock(&dev_priv->pcu_lock); } From 37d933fc1728e998d929d0ff5113dcb14ce31293 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Tue, 10 Oct 2017 22:30:10 +0100 Subject: [PATCH 137/145] drm/i915: Introduce separate status variable for RC6 and LLC ring frequency setup Defined new struct intel_rc6 to hold RC6 specific state and intel_ring_pstate to hold ring specific state. v2: s/intel_ring_pstate/intel_llc_pstate. Removed checks from autoenable_* functions. (Chris) Signed-off-by: Sagar Arun Kamble Cc: Imre Deak Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Radoslaw Szwichtenberg Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1507360055-19948-13-git-send-email-sagar.a.kamble@intel.com Acked-by: Imre Deak Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010213010.7415-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 10 ++++++ drivers/gpu/drm/i915/intel_pm.c | 54 +++++++++++++++++++-------------- 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f3ac1f45e154..f1e651703764 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2502,7 +2502,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rps.enabled && intel_rc6_enabled()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled()))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 521348ee7242..6bbc4b83aa0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1365,8 +1365,18 @@ struct intel_rps { struct intel_rps_ei ei; }; +struct intel_rc6 { + bool enabled; +}; + +struct intel_llc_pstate { + bool enabled; +}; + struct intel_gen6_power_mgmt { struct intel_rps rps; + struct intel_rc6 rc6; + struct intel_llc_pstate llc_pstate; struct delayed_work autoenable_work; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a4d431d3980a..2fcff9788b6f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7964,7 +7964,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - dev_priv->gt_pm.rps.enabled = true; /* force disabling */ + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ + dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); gen6_reset_rps_interrupts(dev_priv); @@ -7974,13 +7975,21 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { lockdep_assert_held(&i915->pcu_lock); + if (!i915->gt_pm.llc_pstate.enabled) + return; + /* Currently there is no HW configuration to be done to disable. */ + + i915->gt_pm.llc_pstate.enabled = false; } static void intel_disable_rc6(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->pcu_lock); + if (!dev_priv->gt_pm.rc6.enabled) + return; + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rc6(dev_priv); else if (IS_CHERRYVIEW(dev_priv)) @@ -7989,12 +7998,17 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv) valleyview_disable_rc6(dev_priv); else if (INTEL_GEN(dev_priv) >= 6) gen6_disable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = false; } static void intel_disable_rps(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->pcu_lock); + if (!dev_priv->gt_pm.rps.enabled) + return; + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rps(dev_priv); else if (IS_CHERRYVIEW(dev_priv)) @@ -8005,15 +8019,12 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv) gen6_disable_rps(dev_priv); else if (IS_IRONLAKE_M(dev_priv)) ironlake_disable_drps(dev_priv); + + dev_priv->gt_pm.rps.enabled = false; } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (!READ_ONCE(rps->enabled)) - return; - mutex_lock(&dev_priv->pcu_lock); intel_disable_rc6(dev_priv); @@ -8021,7 +8032,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); - rps->enabled = false; mutex_unlock(&dev_priv->pcu_lock); } @@ -8029,13 +8039,21 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) { lockdep_assert_held(&i915->pcu_lock); + if (i915->gt_pm.llc_pstate.enabled) + return; + gen6_update_ring_freq(i915); + + i915->gt_pm.llc_pstate.enabled = true; } static void intel_enable_rc6(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->pcu_lock); + if (dev_priv->gt_pm.rc6.enabled) + return; + if (IS_CHERRYVIEW(dev_priv)) cherryview_enable_rc6(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) @@ -8046,6 +8064,8 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv) gen8_enable_rc6(dev_priv); else if (INTEL_GEN(dev_priv) >= 6) gen6_enable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = true; } static void intel_enable_rps(struct drm_i915_private *dev_priv) @@ -8054,6 +8074,9 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->pcu_lock); + if (rps->enabled) + return; + if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { @@ -8074,18 +8097,12 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) WARN_ON(rps->efficient_freq < rps->min_freq); WARN_ON(rps->efficient_freq > rps->max_freq); + + rps->enabled = true; } void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* We shouldn't be disabling as we submit, so this should be less - * racy than it appears! - */ - if (READ_ONCE(rps->enabled)) - return; - /* Powersaving is controlled by the host when inside a VM */ if (intel_vgpu_active(dev_priv)) return; @@ -8097,7 +8114,6 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); - rps->enabled = true; mutex_unlock(&dev_priv->pcu_lock); } @@ -8110,9 +8126,6 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work) struct intel_engine_cs *rcs; struct drm_i915_gem_request *req; - if (READ_ONCE(dev_priv->gt_pm.rps.enabled)) - goto out; - rcs = dev_priv->engine[RCS]; if (rcs->last_retired_context) goto out; @@ -8140,9 +8153,6 @@ out: void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->gt_pm.rps.enabled)) - return; - if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); From af7a8ffad9c58deac791333a65c62d7fc72f9e9c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Oct 2017 11:10:19 +0200 Subject: [PATCH 138/145] drm/i915: Use rcu instead of stop_machine in set_wedged stop_machine is not really a locking primitive we should use, except when the hw folks tell us the hw is broken and that's the only way to work around it. This patch tries to address the locking abuse of stop_machine() from commit 20e4933c478a1ca694b38fa4ac44d99e659941f5 Author: Chris Wilson Date: Tue Nov 22 14:41:21 2016 +0000 drm/i915: Stop the machine as we install the wedged submit_request handler Chris said parts of the reasons for going with stop_machine() was that it's no overhead for the fast-path. But these callbacks use irqsave spinlocks and do a bunch of MMIO, and rcu_read_lock is _real_ fast. To stay as close as possible to the stop_machine semantics we first update all the submit function pointers to the nop handler, then call synchronize_rcu() to make sure no new requests can be submitted. This should give us exactly the huge barrier we want. I pondered whether we should annotate engine->submit_request as __rcu and use rcu_assign_pointer and rcu_dereference on it. But the reason behind those is to make sure the compiler/cpu barriers are there for when you have an actual data structure you point at, to make sure all the writes are seen correctly on the read side. But we just have a function pointer, and .text isn't changed, so no need for these barriers and hence no need for annotations. Unfortunately there's a complication with the call to intel_engine_init_global_seqno: - Without stop_machine we must hold the corresponding spinlock. - Without stop_machine we must ensure that all requests are marked as having failed with dma_fence_set_error() before we call it. That means we need to split the nop request submission into two phases, both synchronized with rcu: 1. Only stop submitting the requests to hw and mark them as failed. 2. After all pending requests in the scheduler/ring are suitably marked up as failed and we can force complete them all, also force complete by calling intel_engine_init_global_seqno(). This should fix the followwing lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G U ------------------------------------------------------ kworker/3:4/562 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){++++}, at: [] stop_machine+0x1c/0x40 but task is already holding lock: (&dev->struct_mutex){+.+.}, at: [] i915_reset_device+0x1e8/0x260 [i915] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&dev->struct_mutex){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __mutex_lock+0x86/0x9b0 mutex_lock_interruptible_nested+0x1b/0x20 i915_mutex_lock_interruptible+0x51/0x130 [i915] i915_gem_fault+0x209/0x650 [i915] __do_fault+0x1e/0x80 __handle_mm_fault+0xa08/0xed0 handle_mm_fault+0x156/0x300 __do_page_fault+0x2c5/0x570 do_page_fault+0x28/0x250 page_fault+0x22/0x30 -> #5 (&mm->mmap_sem){++++}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __might_fault+0x68/0x90 _copy_to_user+0x23/0x70 filldir+0xa5/0x120 dcache_readdir+0xf9/0x170 iterate_dir+0x69/0x1a0 SyS_getdents+0xa5/0x140 entry_SYSCALL_64_fastpath+0x1c/0xb1 -> #4 (&sb->s_type->i_mutex_key#5){++++}: down_write+0x3b/0x70 handle_create+0xcb/0x1e0 devtmpfsd+0x139/0x180 kthread+0x152/0x190 ret_from_fork+0x27/0x40 -> #3 ((complete)&req.done){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 wait_for_common+0x58/0x210 wait_for_completion+0x1d/0x20 devtmpfs_create_node+0x13d/0x160 device_add+0x5eb/0x620 device_create_groups_vargs+0xe0/0xf0 device_create+0x3a/0x40 msr_device_create+0x2b/0x40 cpuhp_invoke_callback+0xc9/0xbf0 cpuhp_thread_fun+0x17b/0x240 smpboot_thread_fn+0x18a/0x280 kthread+0x152/0x190 ret_from_fork+0x27/0x40 -> #2 (cpuhp_state-up){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 cpuhp_issue_call+0x133/0x1c0 __cpuhp_setup_state_cpuslocked+0x139/0x2a0 __cpuhp_setup_state+0x46/0x60 page_writeback_init+0x43/0x67 pagecache_init+0x3d/0x42 start_kernel+0x3a8/0x3fc x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x6d/0x70 verify_cpu+0x0/0xfb -> #1 (cpuhp_state_mutex){+.+.}: __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 __mutex_lock+0x86/0x9b0 mutex_lock_nested+0x1b/0x20 __cpuhp_setup_state_cpuslocked+0x53/0x2a0 __cpuhp_setup_state+0x46/0x60 page_alloc_init+0x28/0x30 start_kernel+0x145/0x3fc x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x6d/0x70 verify_cpu+0x0/0xfb -> #0 (cpu_hotplug_lock.rw_sem){++++}: check_prev_add+0x430/0x840 __lock_acquire+0x1420/0x15e0 lock_acquire+0xb0/0x200 cpus_read_lock+0x3d/0xb0 stop_machine+0x1c/0x40 i915_gem_set_wedged+0x1a/0x20 [i915] i915_reset+0xb9/0x230 [i915] i915_reset_device+0x1f6/0x260 [i915] i915_handle_error+0x2d8/0x430 [i915] hangcheck_declare_hang+0xd3/0xf0 [i915] i915_hangcheck_elapsed+0x262/0x2d0 [i915] process_one_work+0x233/0x660 worker_thread+0x4e/0x3b0 kthread+0x152/0x190 ret_from_fork+0x27/0x40 other info that might help us debug this: Chain exists of: cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->struct_mutex); lock(&mm->mmap_sem); lock(&dev->struct_mutex); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** 3 locks held by kworker/3:4/562: #0: ("events_long"){+.+.}, at: [] process_one_work+0x1aa/0x660 #1: ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [] process_one_work+0x1aa/0x660 #2: (&dev->struct_mutex){+.+.}, at: [] i915_reset_device+0x1e8/0x260 [i915] stack backtrace: CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G U 4.14.0-rc3-CI-CI_DRM_3179+ #1 Hardware name: /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017 Workqueue: events_long i915_hangcheck_elapsed [i915] Call Trace: dump_stack+0x68/0x9f print_circular_bug+0x235/0x3c0 ? lockdep_init_map_crosslock+0x20/0x20 check_prev_add+0x430/0x840 ? irq_work_queue+0x86/0xe0 ? wake_up_klogd+0x53/0x70 __lock_acquire+0x1420/0x15e0 ? __lock_acquire+0x1420/0x15e0 ? lockdep_init_map_crosslock+0x20/0x20 lock_acquire+0xb0/0x200 ? stop_machine+0x1c/0x40 ? i915_gem_object_truncate+0x50/0x50 [i915] cpus_read_lock+0x3d/0xb0 ? stop_machine+0x1c/0x40 stop_machine+0x1c/0x40 i915_gem_set_wedged+0x1a/0x20 [i915] i915_reset+0xb9/0x230 [i915] i915_reset_device+0x1f6/0x260 [i915] ? gen8_gt_irq_ack+0x170/0x170 [i915] ? work_on_cpu_safe+0x60/0x60 i915_handle_error+0x2d8/0x430 [i915] ? vsnprintf+0xd1/0x4b0 ? scnprintf+0x3a/0x70 hangcheck_declare_hang+0xd3/0xf0 [i915] ? intel_runtime_pm_put+0x56/0xa0 [i915] i915_hangcheck_elapsed+0x262/0x2d0 [i915] process_one_work+0x233/0x660 worker_thread+0x4e/0x3b0 kthread+0x152/0x190 ? process_one_work+0x660/0x660 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x27/0x40 Setting dangerous option reset - tainting kernel i915 0000:00:02.0: Resetting chip after gpu hang Setting dangerous option reset - tainting kernel i915 0000:00:02.0: Resetting chip after gpu hang v2: Have 1 global synchronize_rcu() barrier across all engines, and improve commit message. v3: We need to protect the seqno update with the timeline spinlock (in set_wedged) to avoid racing with other updates of the seqno, like we already do in nop_submit_request (Chris). v4: Use two-phase sequence to plug the race Chris spotted where we can complete requests before they're marked up with -EIO. v5: Review from Chris: - simplify nop_submit_request. - Add comment to rcu_read_lock section. - Align comments with the new style. v6: Remove unused variable to appease CI. Reviewed-by: Chris Wilson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102886 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103096 Cc: Chris Wilson Cc: Mika Kuoppala Cc: Thomas Gleixner Cc: Marta Lofstedt Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171011091019.1425-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_gem.c | 84 ++++++++++++------- drivers/gpu/drm/i915/i915_gem_request.c | 9 ++ .../gpu/drm/i915/selftests/i915_gem_request.c | 2 + 3 files changed, 64 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f76890b74d00..20fcac37c85a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3086,6 +3086,14 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } static void nop_submit_request(struct drm_i915_gem_request *request) +{ + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); + dma_fence_set_error(&request->fence, -EIO); + + i915_gem_request_submit(request); +} + +static void nop_complete_submit_request(struct drm_i915_gem_request *request) { unsigned long flags; @@ -3098,45 +3106,59 @@ static void nop_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } -static void engine_set_wedged(struct intel_engine_cs *engine) +void i915_gem_set_wedged(struct drm_i915_private *i915) { - /* We need to be sure that no thread is running the old callback as - * we install the nop handler (otherwise we would submit a request - * to hardware that will never complete). In order to prevent this - * race, we wait until the machine is idle before making the swap - * (using stop_machine()). - */ - engine->submit_request = nop_submit_request; - - /* Mark all executing requests as skipped */ - engine->cancel_requests(engine); - - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); -} - -static int __i915_gem_set_wedged_BKL(void *data) -{ - struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ for_each_engine(engine, i915, id) - engine_set_wedged(engine); + engine->submit_request = nop_submit_request; + + /* + * Make sure no one is running the old callback before we proceed with + * cancelling requests and resetting the completion tracking. Otherwise + * we might submit a request to the hardware which never completes. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + /* Mark all executing requests as skipped */ + engine->cancel_requests(engine); + + /* + * Only once we've force-cancelled all in-flight requests can we + * start to complete all requests. + */ + engine->submit_request = nop_complete_submit_request; + } + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_init_global_seqno, or the one + * in nop_complete_submit_request. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + unsigned long flags; + + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + spin_lock_irqsave(&engine->timeline->lock, flags); + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } set_bit(I915_WEDGED, &i915->gpu_error.flags); wake_up_all(&i915->gpu_error.reset_queue); - - return 0; -} - -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) -{ - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d5f4023e5d63..d140fcf5c6a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -556,7 +556,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_gem_request_submit(request); + /* + * We need to serialize use of the submit_request() callback with its + * hotplugging performed during an emergency i915_gem_set_wedged(). + * We use the RCU mechanism to mark the critical section in order to + * force i915_gem_set_wedged() to wait until the submit_request() is + * completed before proceeding. + */ + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); break; case FENCE_FREE: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 78b9f811707f..a999161e8db1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -215,7 +215,9 @@ static int igt_request_rewind(void *arg) } i915_gem_request_get(vip); i915_add_request(vip); + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); mutex_unlock(&i915->drm.struct_mutex); From de330815677d80a7eae7c82e134de1507867291b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 9 Oct 2017 19:19:50 +0300 Subject: [PATCH 139/145] drm/i915: Reuse normal state readout for LVDS/DVO fixed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reuse the normal state readout code to get the fixed mode for LVDS/DVO encoders. This removes some partially duplicated state readout code from LVDS/DVO encoders. The duplicated code wasn't actually even populating the negative h/vsync flags, leading to possible state checker complaints. The normal readout code populates that stuff fully. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171009161951.22420-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Tested-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 50 +++++++++++++--------------- drivers/gpu/drm/i915/intel_drv.h | 5 +-- drivers/gpu/drm/i915/intel_dvo.c | 33 +++++------------- drivers/gpu/drm/i915/intel_lvds.c | 18 +++------- 4 files changed, 39 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b2c5fba102e1..932d846e2456 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10238,48 +10238,44 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc, &pipe_config->fdi_m_n); } -/** Returns the currently programmed mode of the given pipe. */ -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc) +/* Returns the currently programmed mode of the given encoder. */ +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc_state *crtc_state; struct drm_display_mode *mode; - struct intel_crtc_state *pipe_config; - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc; + enum pipe pipe; + + if (!encoder->get_hw_state(encoder, &pipe)) + return NULL; + + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); mode = kzalloc(sizeof(*mode), GFP_KERNEL); if (!mode) return NULL; - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); - if (!pipe_config) { + crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); + if (!crtc_state) { kfree(mode); return NULL; } - /* - * Construct a pipe_config sufficient for getting the clock info - * back out of crtc_clock_get. - * - * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need - * to use a real value here instead. - */ - pipe_config->cpu_transcoder = (enum transcoder) pipe; - pipe_config->pixel_multiplier = 1; - pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe)); - pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe)); - pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); - i9xx_crtc_clock_get(intel_crtc, pipe_config); + crtc_state->base.crtc = &crtc->base; - pipe_config->base.adjusted_mode.crtc_clock = - pipe_config->port_clock / pipe_config->pixel_multiplier; + if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { + kfree(crtc_state); + kfree(mode); + return NULL; + } - intel_get_pipe_timings(intel_crtc, pipe_config); + encoder->get_config(encoder, crtc_state); - intel_mode_from_pipe_config(mode, pipe_config); + intel_mode_from_pipe_config(mode, crtc_state); - kfree(pipe_config); + kfree(crtc_state); return mode; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cdda0a84babe..b87946dcc53f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1363,8 +1363,9 @@ struct intel_connector *intel_connector_alloc(void); bool intel_connector_get_hw_state(struct intel_connector *connector); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc); +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder); + enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 5c562e1f56ae..53c9b763f4ce 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -379,32 +379,15 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = { * chip being on DVOB/C and having multiple pipes. */ static struct drm_display_mode * -intel_dvo_get_current_mode(struct drm_connector *connector) +intel_dvo_get_current_mode(struct intel_encoder *encoder) { - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); - uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg); - struct drm_display_mode *mode = NULL; + struct drm_display_mode *mode; - /* If the DVO port is active, that'll be the LVDS, so we can pull out - * its timings to get how the BIOS set up the panel. - */ - if (dvo_val & DVO_ENABLE) { - struct intel_crtc *crtc; - int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; - - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - if (crtc) { - mode = intel_crtc_mode_get(dev, &crtc->base); - if (mode) { - mode->type |= DRM_MODE_TYPE_PREFERRED; - if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PHSYNC; - if (dvo_val & DVO_VSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PVSYNC; - } - } + mode = intel_encoder_current_mode(encoder); + if (mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(mode); + mode->type |= DRM_MODE_TYPE_PREFERRED; } return mode; @@ -551,7 +534,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) * mode being output through DVO. */ intel_panel_init(&intel_connector->panel, - intel_dvo_get_current_mode(connector), + intel_dvo_get_current_mode(intel_encoder), NULL, NULL); intel_dvo->panel_wants_dither = true; } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index a55954a89148..c5072b951b9c 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -939,10 +939,8 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; struct edid *edid; - struct intel_crtc *crtc; i915_reg_t lvds_reg; u32 lvds; - int pipe; u8 pin; u32 allowed_scalers; @@ -1118,17 +1116,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) if (HAS_PCH_SPLIT(dev_priv)) goto failed; - pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - if (crtc && (lvds & LVDS_PORT_EN)) { - fixed_mode = intel_crtc_mode_get(dev, &crtc->base); - if (fixed_mode) { - DRM_DEBUG_KMS("using current (BIOS) mode: "); - drm_mode_debug_printmodeline(fixed_mode); - fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - goto out; - } + fixed_mode = intel_encoder_current_mode(intel_encoder); + if (fixed_mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(fixed_mode); + fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; } /* If we still don't have a mode after all that, give up. */ From 3c7b6b3c4f2a4882d9d82365cc122cc8d29f4811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 9 Oct 2017 19:19:51 +0300 Subject: [PATCH 140/145] drm/i915: Allow PCH platforms fall back to BIOS LVDS mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With intel_encoder_current_mode() using the normal state readout code it actually works on PCH platforms as well. So let's nuke the PCH check from intel_lvds_init(). I suppose there aren't any machines that actually need this, but at least we get to eliminate a few lines of code, and one FIXME. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171009161951.22420-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lvds.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index c5072b951b9c..38572d65e46e 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -1111,11 +1111,6 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) * on. If so, assume that whatever is currently programmed is the * correct mode. */ - - /* Ironlake: FIXME if still fail, not try pipe mode now */ - if (HAS_PCH_SPLIT(dev_priv)) - goto failed; - fixed_mode = intel_encoder_current_mode(intel_encoder); if (fixed_mode) { DRM_DEBUG_KMS("using current (BIOS) mode: "); From fc603ca7f81d729bbec7ced294f25a93113c49f7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 9 Oct 2017 12:29:58 +0300 Subject: [PATCH 141/145] drm/i915/dp: centralize max source rate conditions more Turn intel_dp_source_supports_hbr2() into a simple helper to query the pre-filled source rates array, and move the conditions about which platforms support which rates to the single point of truth in intel_dp_set_source_rates(). This also reduces the code paths you have to think about in the source rates initialization in intel_dp_set_source_rates(), making it easier to grasp. Cc: Manasi Navare Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171009092959.29021-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ca48bce23a6f..09d75df497c0 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -254,15 +254,15 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); - } else { + } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || + IS_BROADWELL(dev_priv)) { source_rates = default_rates; size = ARRAY_SIZE(default_rates); + } else { + source_rates = default_rates; + size = ARRAY_SIZE(default_rates) - 1; } - /* This depends on the fact that 5.4 is last value in the array */ - if (!intel_dp_source_supports_hbr2(intel_dp)) - size--; - intel_dp->source_rates = source_rates; intel_dp->num_source_rates = size; } @@ -1482,14 +1482,9 @@ intel_dp_aux_init(struct intel_dp *intel_dp) bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1]; - if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || - IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9)) - return true; - else - return false; + return max_rate >= 540000; } static void From a8a08886ef8c6a5a740e50e3897466cfd11724ff Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 9 Oct 2017 12:29:59 +0300 Subject: [PATCH 142/145] drm/i915/dp: limit sink rates based on rate Get rid of redundant intel_dp_num_rates(). We can simply look at the rate and limit based on that. Cc: Manasi Navare Reviewed-by: Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20171009092959.29021-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 09d75df497c0..b0f446b68f42 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -137,32 +137,20 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); -static int intel_dp_num_rates(u8 link_bw_code) -{ - switch (link_bw_code) { - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - link_bw_code); - case DP_LINK_BW_1_62: - return 1; - case DP_LINK_BW_2_7: - return 2; - case DP_LINK_BW_5_4: - return 3; - } -} - /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { - int i, num_rates; + int i, max_rate; - num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < num_rates; i++) + for (i = 0; i < ARRAY_SIZE(default_rates); i++) { + if (default_rates[i] > max_rate) + break; intel_dp->sink_rates[i] = default_rates[i]; + } - intel_dp->num_sink_rates = num_rates; + intel_dp->num_sink_rates = i; } /* Theoretical max between source and sink */ From 7c781423379d8215f2cc4940700bde0b9f93fed0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Oct 2017 15:18:57 +0100 Subject: [PATCH 143/145] drm/i915/userptr: Drop struct_mutex before cleanup Purely to silence lockdep, as we know that no bo can exist at this time and so the inversion is impossible. Nevertheless, lockdep currently warns on unload: [ 137.522565] WARNING: possible circular locking dependency detected [ 137.522568] 4.14.0-rc4-CI-CI_DRM_3209+ #1 Tainted: G U [ 137.522570] ------------------------------------------------------ [ 137.522572] drv_module_relo/1532 is trying to acquire lock: [ 137.522574] ("i915-userptr-acquire"){+.+.}, at: [] flush_workqueue+0x91/0x540 [ 137.522581] but task is already holding lock: [ 137.522583] (&dev->struct_mutex){+.+.}, at: [] i915_gem_fini+0x3f/0xc0 [i915] [ 137.522605] which lock already depends on the new lock. [ 137.522608] the existing dependency chain (in reverse order) is: [ 137.522611] -> #3 (&dev->struct_mutex){+.+.}: [ 137.522615] __lock_acquire+0x1420/0x15e0 [ 137.522618] lock_acquire+0xb0/0x200 [ 137.522621] __mutex_lock+0x86/0x9b0 [ 137.522623] mutex_lock_interruptible_nested+0x1b/0x20 [ 137.522640] i915_mutex_lock_interruptible+0x51/0x130 [i915] [ 137.522657] i915_gem_fault+0x20b/0x720 [i915] [ 137.522660] __do_fault+0x1e/0x80 [ 137.522662] __handle_mm_fault+0xa08/0xed0 [ 137.522664] handle_mm_fault+0x156/0x300 [ 137.522666] __do_page_fault+0x2c5/0x570 [ 137.522668] do_page_fault+0x28/0x250 [ 137.522671] page_fault+0x22/0x30 [ 137.522672] -> #2 (&mm->mmap_sem){++++}: [ 137.522677] __lock_acquire+0x1420/0x15e0 [ 137.522679] lock_acquire+0xb0/0x200 [ 137.522682] down_read+0x3e/0x70 [ 137.522699] __i915_gem_userptr_get_pages_worker+0x141/0x240 [i915] [ 137.522701] process_one_work+0x233/0x660 [ 137.522704] worker_thread+0x4e/0x3b0 [ 137.522706] kthread+0x152/0x190 [ 137.522708] ret_from_fork+0x27/0x40 [ 137.522710] -> #1 ((&work->work)){+.+.}: [ 137.522714] __lock_acquire+0x1420/0x15e0 [ 137.522717] lock_acquire+0xb0/0x200 [ 137.522719] process_one_work+0x206/0x660 [ 137.522721] worker_thread+0x4e/0x3b0 [ 137.522723] kthread+0x152/0x190 [ 137.522725] ret_from_fork+0x27/0x40 [ 137.522727] -> #0 ("i915-userptr-acquire"){+.+.}: [ 137.522731] check_prev_add+0x430/0x840 [ 137.522733] __lock_acquire+0x1420/0x15e0 [ 137.522735] lock_acquire+0xb0/0x200 [ 137.522738] flush_workqueue+0xb4/0x540 [ 137.522740] drain_workqueue+0xd4/0x1b0 [ 137.522742] destroy_workqueue+0x1c/0x200 [ 137.522758] i915_gem_cleanup_userptr+0x15/0x20 [i915] [ 137.522770] i915_gem_fini+0x5f/0xc0 [i915] [ 137.522782] i915_driver_unload+0x122/0x180 [i915] [ 137.522794] i915_pci_remove+0x19/0x30 [i915] [ 137.522797] pci_device_remove+0x39/0xb0 [ 137.522800] device_release_driver_internal+0x15d/0x220 [ 137.522803] driver_detach+0x40/0x80 [ 137.522805] bus_remove_driver+0x58/0xd0 [ 137.522807] driver_unregister+0x2c/0x40 [ 137.522809] pci_unregister_driver+0x36/0xb0 [ 137.522828] i915_exit+0x1a/0x8b [i915] [ 137.522831] SyS_delete_module+0x18c/0x1e0 [ 137.522834] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 137.522835] other info that might help us debug this: [ 137.522838] Chain exists of: "i915-userptr-acquire" --> &mm->mmap_sem --> &dev->struct_mutex [ 137.522844] Possible unsafe locking scenario: [ 137.522846] CPU0 CPU1 [ 137.522848] ---- ---- [ 137.522850] lock(&dev->struct_mutex); [ 137.522852] lock(&mm->mmap_sem); [ 137.522854] lock(&dev->struct_mutex); [ 137.522857] lock("i915-userptr-acquire"); [ 137.522859] *** DEADLOCK *** [ 137.522862] 3 locks held by drv_module_relo/1532: [ 137.522864] #0: (&dev->mutex){....}, at: [] device_release_driver_internal+0x2b/0x220 [ 137.522869] #1: (&dev->mutex){....}, at: [] device_release_driver_internal+0x39/0x220 [ 137.522873] #2: (&dev->struct_mutex){+.+.}, at: [] i915_gem_fini+0x3f/0xc0 [i915] [ 137.522888] stack backtrace: [ 137.522891] CPU: 0 PID: 1532 Comm: drv_module_relo Tainted: G U 4.14.0-rc4-CI-CI_DRM_3209+ #1 [ 137.522894] Hardware name: /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017 [ 137.522897] Call Trace: [ 137.522900] dump_stack+0x68/0x9f [ 137.522902] print_circular_bug+0x235/0x3c0 [ 137.522905] ? lockdep_init_map_crosslock+0x20/0x20 [ 137.522908] check_prev_add+0x430/0x840 [ 137.522919] ? i915_gem_fini+0x5f/0xc0 [i915] [ 137.522922] ? __kernel_text_address+0x12/0x40 [ 137.522925] ? __save_stack_trace+0x66/0xd0 [ 137.522928] __lock_acquire+0x1420/0x15e0 [ 137.522930] ? __lock_acquire+0x1420/0x15e0 [ 137.522933] ? lockdep_init_map_crosslock+0x20/0x20 [ 137.522936] ? __this_cpu_preempt_check+0x13/0x20 [ 137.522938] lock_acquire+0xb0/0x200 [ 137.522940] ? flush_workqueue+0x91/0x540 [ 137.522943] flush_workqueue+0xb4/0x540 [ 137.522945] ? flush_workqueue+0x91/0x540 [ 137.522948] ? __mutex_unlock_slowpath+0x43/0x2c0 [ 137.522951] ? trace_hardirqs_on_caller+0xe3/0x1b0 [ 137.522954] drain_workqueue+0xd4/0x1b0 [ 137.522956] ? drain_workqueue+0xd4/0x1b0 [ 137.522958] destroy_workqueue+0x1c/0x200 [ 137.522975] i915_gem_cleanup_userptr+0x15/0x20 [i915] [ 137.522987] i915_gem_fini+0x5f/0xc0 [i915] [ 137.523000] i915_driver_unload+0x122/0x180 [i915] [ 137.523015] i915_pci_remove+0x19/0x30 [i915] [ 137.523018] pci_device_remove+0x39/0xb0 [ 137.523021] device_release_driver_internal+0x15d/0x220 [ 137.523023] driver_detach+0x40/0x80 [ 137.523026] bus_remove_driver+0x58/0xd0 [ 137.523028] driver_unregister+0x2c/0x40 [ 137.523030] pci_unregister_driver+0x36/0xb0 [ 137.523049] i915_exit+0x1a/0x8b [i915] [ 137.523052] SyS_delete_module+0x18c/0x1e0 [ 137.523055] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 137.523057] RIP: 0033:0x7f7bd0609287 [ 137.523059] RSP: 002b:00007ffef694bc18 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 137.523062] RAX: ffffffffffffffda RBX: ffffffff81493f33 RCX: 00007f7bd0609287 [ 137.523065] RDX: 0000000000000001 RSI: 0000000000000800 RDI: 0000564f999f9fc8 [ 137.523067] RBP: ffffc90005c4ff88 R08: 0000000000000000 R09: 0000000000000080 [ 137.523069] R10: 00007f7bd20ef8c0 R11: 0000000000000246 R12: 0000000000000000 [ 137.523072] R13: 00007ffef694be00 R14: 0000000000000000 R15: 0000000000000000 [ 137.523075] ? __this_cpu_preempt_check+0x13/0x20 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171011141857.14161-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f1e651703764..3db5851756f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -615,9 +615,10 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); - i915_gem_cleanup_userptr(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_cleanup_userptr(dev_priv); + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!list_empty(&dev_priv->contexts.list)); From 612dde7ec306696d376d88d659f6287391d369a4 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Tue, 10 Oct 2017 17:33:55 +0300 Subject: [PATCH 144/145] drm/i915: Simplify intel_sanitize_enable_ppgtt Remove dead code around has_aliasing_ppgtt condition. Suggested-by: Colin Ian King Signed-off-by: Joonas Lahtinen Cc: Colin Ian King Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171010143355.16577-1-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ca7fd34fbe8b..daba55a4fce2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -135,11 +135,12 @@ static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt) { - bool has_aliasing_ppgtt; bool has_full_ppgtt; bool has_full_48bit_ppgtt; - has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; + if (!dev_priv->info.has_aliasing_ppgtt) + return 0; + has_full_ppgtt = dev_priv->info.has_full_ppgtt; has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; @@ -149,9 +150,6 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); } - if (!has_aliasing_ppgtt) - return 0; - /* * We don't allow disabling PPGTT for gen9+ as it's a requirement for * execlists, the sole mechanism available to submit work. @@ -188,7 +186,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 2; } - return has_aliasing_ppgtt ? 1 : 0; + return 1; } static int ppgtt_bind_vma(struct i915_vma *vma, From fa9caf0b6e69703ff8a4d4da17897008ec2f2dd3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 12 Oct 2017 21:05:11 +0300 Subject: [PATCH 145/145] drm/i915: Update DRIVER_DATE to 20171012 Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6bbc4b83aa0a..c7b2ca6aff05 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170929" -#define DRIVER_TIMESTAMP 1506682238 +#define DRIVER_DATE "20171012" +#define DRIVER_TIMESTAMP 1507831511 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions