diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5182e3d5557d..66d23b619db1 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -47,6 +47,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ + i915_gemfs.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -59,6 +60,8 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_uc_fw.o \ + intel_guc.o \ intel_guc_ct.o \ intel_guc_log.o \ intel_guc_loader.o \ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d5892d24f0b6..f6ded475bb2c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -174,6 +174,7 @@ static int shadow_context_status_change(struct notifier_block *nb, atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: atomic_set(&workload->shadow_ctx_active, 0); break; default: diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b4a6ac60e7c6..0bb6e01121fc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -30,6 +30,7 @@ #include #include #include "intel_drv.h" +#include "i915_guc_submission.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { @@ -97,7 +98,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return !list_empty(&obj->userfault_link) ? 'g' : ' '; + return obj->userfault_count ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -118,6 +119,36 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) return size; } +static const char * +stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len) +{ + size_t x = 0; + + switch (page_sizes) { + case 0: + return ""; + case I915_GTT_PAGE_SIZE_4K: + return "4K"; + case I915_GTT_PAGE_SIZE_64K: + return "64K"; + case I915_GTT_PAGE_SIZE_2M: + return "2M"; + default: + if (!buf) + return "M"; + + if (page_sizes & I915_GTT_PAGE_SIZE_2M) + x += snprintf(buf + x, len - x, "2M, "); + if (page_sizes & I915_GTT_PAGE_SIZE_64K) + x += snprintf(buf + x, len - x, "64K, "); + if (page_sizes & I915_GTT_PAGE_SIZE_4K) + x += snprintf(buf + x, len - x, "4K, "); + buf[x-2] = '\0'; + + return buf; + } +} + static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { @@ -155,9 +186,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; - seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s", i915_vma_is_ggtt(vma) ? "g" : "pp", - vma->node.start, vma->node.size); + vma->node.start, vma->node.size, + stringify_page_sizes(vma->page_sizes.gtt, NULL, 0)); if (i915_vma_is_ggtt(vma)) { switch (vma->ggtt_view.type) { case I915_GGTT_VIEW_NORMAL: @@ -402,10 +434,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mapped_count, purgeable_count, dpy_count; - u64 size, mapped_size, purgeable_size, dpy_size; + u32 count, mapped_count, purgeable_count, dpy_count, huge_count; + u64 size, mapped_size, purgeable_size, dpy_size, huge_size; struct drm_i915_gem_object *obj; + unsigned int page_sizes = 0; struct drm_file *file; + char buf[80]; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -419,6 +453,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; + huge_size = huge_count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { size += obj->base.size; ++count; @@ -432,6 +467,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); @@ -454,6 +495,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u bound objects, %llu bytes\n", count, size); @@ -461,11 +508,18 @@ static int i915_gem_object_info(struct seq_file *m, void *data) purgeable_count, purgeable_size); seq_printf(m, "%u mapped objects, %llu bytes\n", mapped_count, mapped_size); + seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n", + huge_count, + stringify_page_sizes(page_sizes, buf, sizeof(buf)), + huge_size); seq_printf(m, "%u display objects (pinned), %llu bytes\n", dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", ggtt->base.total, ggtt->mappable_end); + seq_printf(m, "Supported page sizes: %s\n", + stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, + buf, sizeof(buf))); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -1026,6 +1080,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1041,9 +1096,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq_sts; + u32 rpmodectl, freq_sts; + + mutex_lock(&dev_priv->pcu_lock); + + rpmodectl = I915_READ(GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); - mutex_lock(&dev_priv->rps.hw_lock); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -1052,21 +1117,21 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); - mutex_unlock(&dev_priv->rps.hw_lock); + intel_gpu_freq(dev_priv, rps->efficient_freq)); + mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1136,10 +1201,17 @@ static int i915_frequency_info(struct seq_file *m, void *unused) pm_iir = I915_READ(GEN8_GT_IIR(2)); pm_mask = I915_READ(GEN6_PMINTRMSK); } + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - dev_priv->rps.pm_intrmsk_mbz); + rps->pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); @@ -1159,8 +1231,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); seq_printf(m, "RP PREV UP: %d (%dus)\n", rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - dev_priv->rps.up_threshold); + seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold); seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); @@ -1168,8 +1239,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); seq_printf(m, "RP PREV DOWN: %d (%dus)\n", rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - dev_priv->rps.down_threshold); + seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -1191,22 +1261,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1447,21 +1517,11 @@ static void print_rc6_res(struct seq_file *m, static int vlv_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, rcctl1, pw_status; + u32 rcctl1, pw_status; pw_status = I915_READ(VLV_GTLC_PW_STATUS); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "Turbo enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))); @@ -1479,7 +1539,7 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0; @@ -1498,24 +1558,16 @@ static int gen6_drpc_info(struct seq_file *m) gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); if (INTEL_GEN(dev_priv) >= 9) { gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", @@ -1778,6 +1830,7 @@ static int i915_emon_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; @@ -1789,19 +1842,17 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) goto out; if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = - dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = - dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq_softlimit; - max_gpu_freq = dev_priv->rps.max_freq_softlimit; + min_gpu_freq = rps->min_freq_softlimit; + max_gpu_freq = rps->max_freq_softlimit; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1820,7 +1871,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 8) & 0xff) * 100); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: intel_runtime_pm_put(dev_priv); @@ -2254,25 +2305,26 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct drm_file *file; - seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); + seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", - atomic_read(&dev_priv->rps.num_waiters)); + atomic_read(&rps->num_waiters)); seq_printf(m, "Frequency requested %d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->min_freq), + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq), + intel_gpu_freq(dev_priv, rps->efficient_freq), + intel_gpu_freq(dev_priv, rps->boost_freq)); mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(file, &dev->filelist, lhead) { @@ -2284,15 +2336,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "%s [%d]: %d boosts\n", task ? task->comm : "", task ? task->pid : -1, - atomic_read(&file_priv->rps.boosts)); + atomic_read(&file_priv->rps_client.boosts)); rcu_read_unlock(); } seq_printf(m, "Kernel (anonymous) boosts: %d\n", - atomic_read(&dev_priv->rps.boosts)); + atomic_read(&rps->boosts)); mutex_unlock(&dev->filelist_mutex); if (INTEL_GEN(dev_priv) >= 6 && - dev_priv->rps.enabled && + rps->enabled && dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -2305,13 +2357,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", - rps_power_to_str(dev_priv->rps.power)); + rps_power_to_str(rps->power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", rpup && rpupei ? 100 * rpup / rpupei : 0, - dev_priv->rps.up_threshold); + rps->up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, - dev_priv->rps.down_threshold); + rps->down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); } @@ -3238,9 +3290,9 @@ static int i915_display_info(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; + struct drm_printer p; intel_runtime_pm_get(dev_priv); @@ -3249,149 +3301,9 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); - for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *rq; - struct rb_node *rb; - u64 addr; - - seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), - engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); - seq_printf(m, "\tReset count: %d\n", - i915_reset_engine_count(error, engine)); - - rcu_read_lock(); - - seq_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tlast "); - - rq = i915_gem_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - seq_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - } - - seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", - I915_READ(RING_START(engine->mmio_base)), - rq ? i915_ggtt_offset(rq->ring->vma) : 0); - seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, - rq ? rq->ring->head : 0); - seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, - rq ? rq->ring->tail : 0); - seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n", - I915_READ(RING_CTL(engine->mmio_base)), - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); - - rcu_read_unlock(); - - addr = intel_engine_get_active_head(engine); - seq_printf(m, "\tACTHD: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - addr = intel_engine_get_last_batch_head(engine); - seq_printf(m, "\tBBADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - - if (i915_modparams.enable_execlists) { - const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - struct intel_engine_execlists * const execlists = &engine->execlists; - u32 ptr, read, write; - unsigned int idx; - - seq_printf(m, "\tExeclist status: 0x%08x %08x\n", - I915_READ(RING_EXECLIST_STATUS_LO(engine)), - I915_READ(RING_EXECLIST_STATUS_HI(engine))); - - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); - read = GEN8_CSB_READ_PTR(ptr); - write = GEN8_CSB_WRITE_PTR(ptr); - seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", - read, execlists->csb_head, - write, - intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); - if (read >= GEN8_CSB_ENTRIES) - read = 0; - if (write >= GEN8_CSB_ENTRIES) - write = 0; - if (read > write) - write += GEN8_CSB_ENTRIES; - while (read < write) { - idx = ++read % GEN8_CSB_ENTRIES; - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", - idx, - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), - hws[idx * 2], - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), - hws[idx * 2 + 1]); - } - - rcu_read_lock(); - for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - unsigned int count; - - rq = port_unpack(&execlists->port[idx], &count); - if (rq) { - seq_printf(m, "\t\tELSP[%d] count=%d, ", - idx, count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[%d] idle\n", - idx); - } - } - rcu_read_unlock(); - - spin_lock_irq(&engine->timeline->lock); - for (rb = execlists->first; rb; rb = rb_next(rb)) { - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); - - list_for_each_entry(rq, &p->requests, - priotree.link) - print_request(m, rq, "\t\tQ "); - } - spin_unlock_irq(&engine->timeline->lock); - } else if (INTEL_GEN(dev_priv) > 6) { - seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE(engine))); - seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE_READ(engine))); - seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", - I915_READ(RING_PP_DIR_DCLV(engine))); - } - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); - - seq_puts(m, "\n"); - } + p = drm_seq_file_printer(m); + for_each_engine(engine, dev_priv, id) + intel_engine_dump(engine, &p); intel_runtime_pm_put(dev_priv); @@ -4258,8 +4170,7 @@ fault_irq_set(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); /* Flush idle worker to disarm irq */ - while (flush_delayed_work(&i915->gt.idle_work)) - ; + drain_delayed_work(&i915->gt.idle_work); return 0; @@ -4392,7 +4303,7 @@ i915_max_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); return 0; } @@ -4400,6 +4311,7 @@ static int i915_max_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4408,7 +4320,7 @@ i915_max_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4417,20 +4329,20 @@ i915_max_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; - if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4447,7 +4359,7 @@ i915_min_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); return 0; } @@ -4455,6 +4367,7 @@ static int i915_min_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4463,7 +4376,7 @@ i915_min_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4472,21 +4385,21 @@ i915_min_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; if (val < hw_min || - val > hw_max || val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + val > hw_max || val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 59ac9199b35d..3db5851756f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -367,9 +367,18 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915_gem_mmap_gtt_version(); break; case I915_PARAM_HAS_SCHEDULER: - value = dev_priv->engine[RCS] && - dev_priv->engine[RCS]->schedule; + value = 0; + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { + value |= I915_SCHEDULER_CAP_ENABLED; + value |= I915_SCHEDULER_CAP_PRIORITY; + + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && + i915_modparams.enable_execlists && + !i915_modparams.enable_guc_submission) + value |= I915_SCHEDULER_CAP_PREEMPTION; + } break; + case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: @@ -606,9 +615,10 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); - i915_gem_cleanup_userptr(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_cleanup_userptr(dev_priv); + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!list_empty(&dev_priv->contexts.list)); @@ -1007,6 +1017,8 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_uc_init_mmio(dev_priv); + ret = intel_engines_init_mmio(dev_priv); if (ret) goto err_uncore; @@ -1580,7 +1592,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && + fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually @@ -2070,11 +2082,14 @@ static int i915_pm_resume(struct device *kdev) /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend(dev); + if (ret) + return ret; + } ret = i915_gem_freeze(kdev_to_i915(kdev)); if (ret) @@ -2085,11 +2100,14 @@ static int i915_pm_freeze(struct device *kdev) static int i915_pm_freeze_late(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend_late(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend_late(dev, true); + if (ret) + return ret; + } ret = i915_gem_freeze_late(kdev_to_i915(kdev)); if (ret) @@ -2485,7 +2503,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled()))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) @@ -2527,12 +2545,12 @@ static int intel_runtime_suspend(struct device *kdev) intel_uncore_suspend(dev_priv); enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); - dev_priv->pm.suspended = true; + dev_priv->runtime_pm.suspended = true; /* * FIXME: We really should find a document that references the arguments @@ -2578,11 +2596,11 @@ static int intel_runtime_resume(struct device *kdev) DRM_DEBUG_KMS("Resuming device\n"); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); disable_rpm_wakeref_asserts(dev_priv); intel_opregion_notify_adapter(dev_priv, PCI_D0); - dev_priv->pm.suspended = false; + dev_priv->runtime_pm.suspended = false; if (intel_uncore_unclaimed_mmio(dev_priv)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ca11318ac69..c7b2ca6aff05 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170929" -#define DRIVER_TIMESTAMP 1506682238 +#define DRIVER_DATE "20171012" +#define DRIVER_TIMESTAMP 1507831511 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -609,7 +609,7 @@ struct drm_i915_file_private { struct intel_rps_client { atomic_t boosts; - } rps; + } rps_client; unsigned int bsd_engine; @@ -783,6 +783,7 @@ struct intel_csr { func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ + func(has_logical_ring_preemption); \ func(has_overlay); \ func(has_pipe_cxsr); \ func(has_pooled_eu); \ @@ -868,6 +869,8 @@ struct intel_device_info { u8 num_sprites[I915_MAX_PIPES]; u8 num_scalers[I915_MAX_PIPES]; + unsigned int page_sizes; /* page sizes supported by the HW */ + #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG @@ -981,6 +984,7 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; + int priority; int ban_score; int active; int guilty; @@ -1003,6 +1007,7 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; + int priority; int ban_score; u32 seqno; u32 head; @@ -1312,7 +1317,7 @@ struct intel_rps_ei { u32 media_c0; }; -struct intel_gen6_power_mgmt { +struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1353,20 +1358,26 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; - struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; /* manual wa residency calculations */ struct intel_rps_ei ei; +}; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex hw_lock; +struct intel_rc6 { + bool enabled; +}; + +struct intel_llc_pstate { + bool enabled; +}; + +struct intel_gen6_power_mgmt { + struct intel_rps rps; + struct intel_rc6 rc6; + struct intel_llc_pstate llc_pstate; + struct delayed_work autoenable_work; }; /* defined intel_pm.c */ @@ -1508,6 +1519,11 @@ struct i915_gem_mm { /** Usable portion of the GTT for GEM */ dma_addr_t stolen_base; /* limited to low memory (32-bit) */ + /** + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; + /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -2251,8 +2267,11 @@ struct drm_i915_private { wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; - struct i915_gem_context *kernel_context; struct intel_engine_cs *engine[I915_NUM_ENGINES]; + /* Context used internally to idle the GPU and setup initial state */ + struct i915_gem_context *kernel_context; + /* Context only to be used for injecting preemption commands */ + struct i915_gem_context *preempt_context; struct i915_vma *semaphore; struct drm_dma_handle *status_page_dmah; @@ -2408,8 +2427,16 @@ struct drm_i915_private { /* Cannot be determined by PCIID. You must always read a register. */ u32 edram_cap; - /* gen6+ rps state */ - struct intel_gen6_power_mgmt rps; + /* + * Protects RPS/RC6 register access and PCU communication. + * Must be taken after struct_mutex if nested. Note that + * this lock may be held for long periods of time when + * talking to hw - so only take it when talking to hw! + */ + struct mutex pcu_lock; + + /* gen6+ GT PM state */ + struct intel_gen6_power_mgmt gt_pm; /* ilk-only ips/rps state. Everything in here is protected by the global * mchdev_lock in intel_pm.c */ @@ -2520,7 +2547,7 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; - struct i915_runtime_pm pm; + struct i915_runtime_pm runtime_pm; struct { bool initialized; @@ -2859,6 +2886,21 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) +static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) +{ + unsigned int page_sizes; + + page_sizes = 0; + while (sg) { + GEM_BUG_ON(sg->offset); + GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); + page_sizes |= sg->length; + sg = __sg_next(sg); + } + + return page_sizes; +} + static inline unsigned int i915_sg_segment_size(void) { unsigned int size = swiotlb_max_segment(); @@ -3088,6 +3130,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) +#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ + GEM_BUG_ON((sizes) == 0); \ + ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \ +}) #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ @@ -3504,7 +3550,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages); + struct sg_table *pages, + unsigned int sg_page_sizes); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check @@ -3726,8 +3773,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) } /* i915_gem_fence_reg.c */ -int __must_check i915_vma_get_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); struct drm_i915_fence_reg * i915_reserve_fence(struct drm_i915_private *dev_priv); void i915_unreserve_fence(struct drm_i915_fence_reg *fence); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73eeb6b1f1cd..20fcac37c85a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "i915_gemfs.h" #include #include #include @@ -161,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static struct sg_table * -i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; drm_dma_handle_t *phys; @@ -170,9 +170,10 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) struct scatterlist *sg; char *vaddr; int i; + int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, @@ -182,7 +183,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) roundup_pow_of_two(obj->base.size), roundup_pow_of_two(obj->base.size)); if (!phys) - return ERR_PTR(-ENOMEM); + return -ENOMEM; vaddr = phys->vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -191,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { - st = ERR_CAST(page); + err = PTR_ERR(page); goto err_phys; } @@ -208,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } @@ -226,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; obj->phys_handle = phys; - return st; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; err_phys: drm_pci_free(obj->base.dev, phys); - return st; + + return err; } static void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -353,7 +358,7 @@ static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { struct drm_i915_gem_request *rq; @@ -386,11 +391,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps) { + if (rps_client) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps); + gen6_rps_boost(rq, rps_client); else - rps = NULL; + rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -406,7 +411,7 @@ static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -425,7 +430,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, - rps); + rps_client); if (timeout < 0) break; @@ -442,7 +447,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) { - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + timeout = i915_gem_object_wait_fence(excl, flags, timeout, + rps_client); prune_fences = timeout >= 0; } @@ -538,7 +544,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) @@ -550,7 +556,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout, - rps); + rps_client); return timeout < 0 ? timeout : 0; } @@ -558,7 +564,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) { struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; + return &fpriv->rps_client; } static int @@ -1050,7 +1056,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1234,7 +1242,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1905,22 +1915,27 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - ret = i915_vma_get_fence(vma); + ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (list_empty(&obj->userfault_link)) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); + if (ret) + goto err_fence; + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); + +err_fence: + i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: @@ -1972,6 +1987,25 @@ err: return ret; } +static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + i915_vma_unset_userfault(vma); + } +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -2002,12 +2036,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - if (list_empty(&obj->userfault_link)) + if (!obj->userfault_count) goto out; - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); + __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB @@ -2035,11 +2067,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.userfault_list, userfault_link) { - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - } + &dev_priv->mm.userfault_list, userfault_link) + __i915_gem_object_release_mmap(obj); /* The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering @@ -2062,7 +2091,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) if (!reg->vma) continue; - GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } } @@ -2261,6 +2290,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2291,8 +2322,7 @@ static bool i915_sg_trim(struct sg_table *orig_st) return true; } -static struct sg_table * -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const unsigned long page_count = obj->base.size / PAGE_SIZE; @@ -2304,6 +2334,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_page_sizes; gfp_t noreclaim; int ret; @@ -2316,12 +2347,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } /* Get the list of pages out of our struct file. They'll be pinned @@ -2335,6 +2366,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2387,8 +2419,10 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_page_sizes |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2399,8 +2433,10 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_page_sizes |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2429,7 +2465,9 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - return st; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err_sg: sg_mark_end(sg); @@ -2450,12 +2488,17 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ERR_PTR(ret); + return ret; } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_page_sizes) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2469,23 +2512,40 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_page_sizes); + obj->mm.page_sizes.phys = sg_page_sizes; + + /* + * Calculate the supported page-sizes which fit into the given + * sg_page_sizes. This will give us the page-sizes which we may be able + * to use opportunistically when later inserting into the GTT. For + * example if phys=2G, then in theory we should be able to use 1G, 2M, + * 64K or 4K pages, although in practice this will depend on a number of + * other factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct sg_table *pages; + int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; } - pages = obj->ops->get_pages(obj); - if (unlikely(IS_ERR(pages))) - return PTR_ERR(pages); + err = obj->ops->get_pages(obj); + GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); - __i915_gem_object_set_pages(obj, pages); - return 0; + return err; } /* Ensure that the associated pages are gathered from the backing storage @@ -2796,7 +2856,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request = NULL; - /* Prevent the signaler thread from updating the request + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + /* + * Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing * the reset. The write from the GPU of the seqno is * asynchronous and the signaler thread may see a different @@ -2807,7 +2877,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ kthread_park(engine->breadcrumbs.signaler); - /* Prevent request submission to the hardware until we have + /* + * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its engine->irq_tasklet *just* as we are @@ -2997,6 +3068,8 @@ void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { tasklet_enable(&engine->execlists.irq_tasklet); kthread_unpark(engine->breadcrumbs.signaler); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) @@ -3016,49 +3089,76 @@ static void nop_submit_request(struct drm_i915_gem_request *request) { GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); + i915_gem_request_submit(request); +} + +static void nop_complete_submit_request(struct drm_i915_gem_request *request) +{ + unsigned long flags; + + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); + dma_fence_set_error(&request->fence, -EIO); + + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } -static void engine_set_wedged(struct intel_engine_cs *engine) +void i915_gem_set_wedged(struct drm_i915_private *i915) { - /* We need to be sure that no thread is running the old callback as - * we install the nop handler (otherwise we would submit a request - * to hardware that will never complete). In order to prevent this - * race, we wait until the machine is idle before making the swap - * (using stop_machine()). - */ - engine->submit_request = nop_submit_request; - - /* Mark all executing requests as skipped */ - engine->cancel_requests(engine); - - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); -} - -static int __i915_gem_set_wedged_BKL(void *data) -{ - struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ for_each_engine(engine, i915, id) - engine_set_wedged(engine); + engine->submit_request = nop_submit_request; + + /* + * Make sure no one is running the old callback before we proceed with + * cancelling requests and resetting the completion tracking. Otherwise + * we might submit a request to the hardware which never completes. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + /* Mark all executing requests as skipped */ + engine->cancel_requests(engine); + + /* + * Only once we've force-cancelled all in-flight requests can we + * start to complete all requests. + */ + engine->submit_request = nop_complete_submit_request; + } + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_init_global_seqno, or the one + * in nop_complete_submit_request. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + unsigned long flags; + + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + spin_lock_irqsave(&engine->timeline->lock, flags); + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } set_bit(I915_WEDGED, &i915->gpu_error.flags); wake_up_all(&i915->gpu_error.reset_queue); - - return 0; -} - -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) -{ - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) @@ -3959,42 +4059,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!view && flags & PIN_MAPPABLE) { + /* If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. + */ + if (obj->base.size > dev_priv->ggtt.mappable_end) + return ERR_PTR(-E2BIG); + + /* If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. + */ + if (flags & PIN_NONBLOCK && + obj->base.size > dev_priv->ggtt.mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + vma = i915_vma_instance(obj, vm, view); if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK && - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); - if (flags & PIN_MAPPABLE) { - /* If the required space is larger than the available - * aperture, we will not able to find a slot for the - * object and unbinding the object now will be in - * vain. Worse, doing so may cause us to ping-pong - * the object in and out of the Global GTT and - * waste a lot of cycles under the mutex. - */ - if (vma->fence_size > dev_priv->ggtt.mappable_end) - return ERR_PTR(-E2BIG); - - /* If NONBLOCK is set the caller is optimistically - * trying to cache the full object within the mappable - * aperture, and *must* have a fallback in place for - * situations where we cannot bind the object. We - * can be a little more lax here and use the fallback - * more often to avoid costly migrations of ourselves - * and other objects within the aperture. - * - * Half-the-aperture is used as a simple heuristic. - * More interesting would to do search for a free - * block prior to making the commitment to unbind. - * That caters for the self-harm case, and with a - * little more heuristics (e.g. NOFAULT, NOEVICT) - * we could try to minimise harm to others. - */ - if (flags & PIN_NONBLOCK && + if (flags & PIN_MAPPABLE && vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } @@ -4221,7 +4326,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, mutex_init(&obj->mm.lock); INIT_LIST_HEAD(&obj->global_link); - INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4251,6 +4355,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .pwrite = i915_gem_object_pwrite_gtt, }; +static int i915_gem_object_create_shmem(struct drm_device *dev, + struct drm_gem_object *obj, + size_t size) +{ + struct drm_i915_private *i915 = to_i915(dev); + unsigned long flags = VM_NORESERVE; + struct file *filp; + + drm_gem_private_object_init(dev, obj, size); + + if (i915->mm.gemfs) + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, + flags); + else + filp = shmem_file_setup("i915", size, flags); + + if (IS_ERR(filp)) + return PTR_ERR(filp); + + obj->filp = filp; + + return 0; +} + struct drm_i915_gem_object * i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { @@ -4275,7 +4403,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4378,6 +4506,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); @@ -4547,8 +4676,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) /* As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - while (flush_delayed_work(&dev_priv->gt.idle_work)) - ; + drain_delayed_work(&dev_priv->gt.idle_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -4595,6 +4723,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); /* As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset @@ -4757,6 +4886,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + /* + * We need to fallback to 4K pages since gvt gtt handling doesn't + * support huge page entries - we will need to check either hypervisor + * mm can support huge guest page or just do emulation in gvt. + */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->page_sizes = + I915_GTT_PAGE_SIZE_4K; + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915_modparams.enable_execlists) { @@ -4914,6 +5052,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + err = i915_gemfs_init(dev_priv); + if (err) + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); + return 0; err_priorities: @@ -4952,6 +5094,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ rcu_barrier(); + + i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) @@ -5341,6 +5485,7 @@ err_unlock: #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 921ee369c74d..5bf96a258509 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -416,14 +416,43 @@ out: return ctx; } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +static struct i915_gem_context * +create_kernel_context(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - /* Init should only be called once per module load. Eventually the - * restriction on the context_disabled check can be loosened. */ - if (WARN_ON(dev_priv->kernel_context)) - return 0; + ctx = i915_gem_create_context(i915, NULL); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + ctx->priority = prio; + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + +int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + int err; + + GEM_BUG_ON(dev_priv->kernel_context); INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); @@ -441,28 +470,38 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); - ctx = i915_gem_create_context(dev_priv, NULL); + /* lowest priority; idle task */ + ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context (error %ld)\n", - PTR_ERR(ctx)); - return PTR_ERR(ctx); + DRM_ERROR("Failed to create default global context\n"); + err = PTR_ERR(ctx); + goto err; } - - /* For easy recognisablity, we want the kernel context to be 0 and then + /* + * For easy recognisablity, we want the kernel context to be 0 and then * all user contexts will have non-zero hw_id. */ GEM_BUG_ON(ctx->hw_id); - - i915_gem_context_clear_bannable(ctx); - ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + /* highest priority; preempting task */ + ctx = create_kernel_context(dev_priv, INT_MAX); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default preempt context\n"); + err = PTR_ERR(ctx); + goto err_kernel_context; + } + dev_priv->preempt_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->engine[RCS]->context_size ? "logical" : "fake"); return 0; + +err_kernel_context: + destroy_kernel_context(&dev_priv->kernel_context); +err: + return err; } void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) @@ -507,15 +546,10 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) void i915_gem_contexts_fini(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; - lockdep_assert_held(&i915->drm.struct_mutex); - /* Keep the context so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - context_close(ctx); - i915_gem_context_free(ctx); + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ ida_destroy(&i915->contexts.hw_ida); @@ -1036,6 +1070,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BANNABLE: args->value = i915_gem_context_is_bannable(ctx); break; + case I915_CONTEXT_PARAM_PRIORITY: + args->value = ctx->priority; + break; default: ret = -EINVAL; break; @@ -1091,6 +1128,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, else i915_gem_context_clear_bannable(ctx); break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + int priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!to_i915(dev)->engine[RCS]->schedule) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->priority = priority; + } + break; + default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6176e589cf09..864439a214c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -256,11 +256,21 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return drm_gem_dmabuf_export(dev, &exp_info); } -static struct sg_table * -i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - return dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); + struct sg_table *pages; + unsigned int sg_page_sizes; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4df039ef2ce3..a5a5b7e6daae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -82,7 +82,7 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) return false; list_add(&vma->evict_link, unwind); @@ -315,6 +315,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { + ret = -ENOSPC; + break; + } + /* Overlap of objects in the same batch? */ if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d733c4d5a500..3d7190764f10 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -367,12 +367,12 @@ eb_pin_vma(struct i915_execbuffer *eb, return false; if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_get_fence(vma))) { + if (unlikely(i915_vma_pin_fence(vma))) { i915_vma_unpin(vma); return false; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -385,7 +385,7 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - i915_vma_unpin_fence(vma); + __i915_vma_unpin_fence(vma); __i915_vma_unpin(vma); } @@ -563,13 +563,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_get_fence(vma); + err = i915_vma_pin_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); return err; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -974,7 +974,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 2783d63bd1ad..012250f25255 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -240,7 +240,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, /* Ensure that all userspace CPU access is completed before * stealing the fence. */ - i915_gem_release_mmap(fence->vma->obj); + GEM_BUG_ON(fence->vma->fence != fence); + i915_vma_revoke_mmap(fence->vma); fence->vma->fence = NULL; fence->vma = NULL; @@ -280,8 +281,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int -i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; @@ -299,6 +299,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *fence; list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->pin_count) continue; @@ -313,7 +315,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) } /** - * i915_vma_get_fence - set up fencing for a vma + * i915_vma_pin_fence - set up fencing for a vma * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write @@ -331,10 +333,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) * 0 on success, negative error code on failure. */ int -i915_vma_get_fence(struct i915_vma *vma) +i915_vma_pin_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + int err; /* Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. @@ -344,6 +347,8 @@ i915_vma_get_fence(struct i915_vma *vma) /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; + GEM_BUG_ON(fence->vma != vma); + fence->pin_count++; if (!fence->dirty) { list_move_tail(&fence->link, &fence->i915->mm.fence_list); @@ -353,10 +358,25 @@ i915_vma_get_fence(struct i915_vma *vma) fence = fence_find(vma->vm->i915); if (IS_ERR(fence)) return PTR_ERR(fence); + + GEM_BUG_ON(fence->pin_count); + fence->pin_count++; } else return 0; - return fence_update(fence, set); + err = fence_update(fence, set); + if (err) + goto out_unpin; + + GEM_BUG_ON(fence->vma != set); + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); + + if (set) + return 0; + +out_unpin: + fence->pin_count--; + return err; } /** @@ -429,8 +449,10 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->vma) - i915_gem_release_mmap(fence->vma->obj); + i915_vma_revoke_mmap(fence->vma); } } @@ -450,13 +472,15 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; + GEM_BUG_ON(vma && vma->fence != reg); + /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ if (vma && !i915_gem_object_is_tiled(vma->obj)) { GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(vma)); list_move(®->link, &dev_priv->mm.fence_list); vma->fence = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4c82ceb8d318..daba55a4fce2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -135,11 +135,12 @@ static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt) { - bool has_aliasing_ppgtt; bool has_full_ppgtt; bool has_full_48bit_ppgtt; - has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; + if (!dev_priv->info.has_aliasing_ppgtt) + return 0; + has_full_ppgtt = dev_priv->info.has_full_ppgtt; has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; @@ -149,9 +150,6 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); } - if (!has_aliasing_ppgtt) - return 0; - /* * We don't allow disabling PPGTT for gen9+ as it's a requirement for * execlists, the sole mechanism available to submit work. @@ -188,7 +186,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 2; } - return has_aliasing_ppgtt ? 1 : 0; + return 1; } static int ppgtt_bind_vma(struct i915_vma *vma, @@ -205,8 +203,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, return ret; } - vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -222,6 +218,30 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } +static int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + static gen8_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level) { @@ -497,22 +517,63 @@ static void fill_page_dma_32(struct i915_address_space *vm, static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - struct page *page; + struct page *page = NULL; dma_addr_t addr; + int order; - page = alloc_page(gfp | __GFP_ZERO); - if (unlikely(!page)) - return -ENOMEM; + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). + * + * TODO: we should really consider write-protecting the scratch-page and + * sharing between ppgtt + */ + if (i915_vm_is_48bit(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + order = get_order(I915_GTT_PAGE_SIZE_64K); + page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); + if (page) { + addr = dma_map_page(vm->dma, page, 0, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_pages(page, order); + page = NULL; + } - addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(vm->dma, addr))) { - __free_page(page); - return -ENOMEM; + if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { + dma_unmap_page(vm->dma, addr, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + __free_pages(page, order); + page = NULL; + } + } + } + + if (!page) { + order = 0; + page = alloc_page(gfp | __GFP_ZERO); + if (unlikely(!page)) + return -ENOMEM; + + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_page(page); + return -ENOMEM; + } } vm->scratch_page.page = page; vm->scratch_page.daddr = addr; + vm->scratch_page.order = order; + return 0; } @@ -520,8 +581,9 @@ static void cleanup_scratch_page(struct i915_address_space *vm) { struct i915_page_dma *p = &vm->scratch_page; - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); + dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, p->order); } static struct i915_page_table *alloc_pt(struct i915_address_space *vm) @@ -989,6 +1051,105 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, cache_level); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, + struct i915_page_directory_pointer **pdps, + struct sgt_dma *iter, + enum i915_cache_level cache_level) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + do { + struct gen8_insert_pte idx = gen8_insert_pte(start); + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + unsigned int page_size; + bool maybe_64K = false; + gen8_pte_t encode = pte_encode; + gen8_pte_t *vaddr; + u16 index, max; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { + index = idx.pde; + max = I915_PDES; + page_size = I915_GTT_PAGE_SIZE_2M; + + encode |= GEN8_PDE_PS_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = pd->page_table[idx.pde]; + + index = idx.pte; + max = GEN8_PTES; + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, @@ -1003,11 +1164,18 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, .max = iter.dma + iter.sg->length, }; struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + } else { + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + &iter, &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } } static void gen8_free_page_tables(struct i915_address_space *vm, @@ -1452,6 +1620,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->debug_dump = gen8_dump_ppgtt; return 0; @@ -1726,6 +1896,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } } while (1); kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -1894,6 +2066,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -1961,6 +2135,23 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(dev_priv) <= 10) + I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, + I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | + GAMW_ECO_ENABLE_64K_IPS_FIELD); } int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) @@ -2405,12 +2596,6 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (obj->gt_ro) @@ -2420,6 +2605,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); intel_runtime_pm_put(i915); + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + /* * Without aliasing PPGTT there's no difference between * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally @@ -2447,12 +2634,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - if (unlikely(!vma->pages)) { - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -2467,7 +2648,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->size); if (ret) - goto err_pages; + return ret; } appgtt->base.insert_entries(&appgtt->base, vma, cache_level, @@ -2481,17 +2662,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; - -err_pages: - if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { - if (vma->pages != vma->obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - } - return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) @@ -2529,6 +2699,21 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } +static int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + static void i915_gtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -3151,6 +3336,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.insert_page = gen8_ggtt_insert_page; ggtt->base.clear_range = nop_clear_range; if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) @@ -3209,6 +3396,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -3254,6 +3443,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = i915_gmch_remove; ggtt->invalidate = gmch_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f62fb903dc24..93211a96fdad 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,7 +42,13 @@ #include "i915_gem_request.h" #include "i915_selftest.h" -#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_PAGE_SIZE_4K BIT(12) +#define I915_GTT_PAGE_SIZE_64K BIT(16) +#define I915_GTT_PAGE_SIZE_2M BIT(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE #define I915_FENCE_REG_NONE -1 @@ -148,6 +154,9 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + struct sg_table; struct intel_rotation_info { @@ -207,6 +216,7 @@ struct i915_vma; struct i915_page_dma { struct page *page; + int order; union { dma_addr_t daddr; @@ -329,6 +339,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; @@ -341,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index c1f64ddaf8aa..ee83ec838ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -44,12 +44,12 @@ static void internal_free_pages(struct sg_table *st) kfree(st); } -static struct sg_table * -i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int sg_page_sizes; unsigned int npages; int max_order; gfp_t gfp; @@ -78,16 +78,17 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) create_st: st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; npages = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; do { int order = min(fls(npages) - 1, max_order); @@ -105,6 +106,7 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_page_sizes |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -132,13 +134,17 @@ create_st: * object are only valid whilst active and pinned. */ obj->mm.madv = I915_MADV_DONTNEED; - return st; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err: sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; } static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..d67f1cbe842d 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -69,7 +69,7 @@ struct drm_i915_gem_object_ops { * being released or under memory pressure (where we attempt to * reap pages for the shrinker). */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); int (*pwrite)(struct drm_i915_gem_object *, @@ -123,6 +123,7 @@ struct drm_i915_gem_object { /** * Whether the object is currently in the GGTT mmap. */ + unsigned int userfault_count; struct list_head userfault_link; struct list_head batch_pool_link; @@ -169,6 +170,35 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + /* TODO: whack some of this into the error state */ + struct i915_page_sizes { + /** + * The sg mask of the pages sg_table. i.e the mask of + * of the lengths for each sg entry. + */ + unsigned int phys; + + /** + * The gtt page sizes we are allowed to use given the + * sg mask and the supported page sizes. This will + * express the smallest unit we can use for the whole + * object, as well as the larger sizes we may be able + * to use opportunistically. + */ + unsigned int sg; + + /** + * The actual gtt page size usage. Since we can have + * multiple vma associated with this object we need to + * prevent any trampling of state, hence a copy of this + * struct also lives in each vma, therefore the gtt + * value here should only be read/write through the vma. + */ + unsigned int gtt; + } page_sizes; + + I915_SELFTEST_DECLARE(unsigned int page_mask); + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 4eb1a76731b2..d140fcf5c6a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt) INIT_LIST_HEAD(&pt->signalers_list); INIT_LIST_HEAD(&pt->waiters_list); INIT_LIST_HEAD(&pt->link); - pt->priority = INT_MIN; + pt->priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) @@ -416,7 +416,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) spin_lock_irq(&request->lock); if (request->waitboost) - atomic_dec(&request->i915->rps.num_waiters); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); dma_fence_signal_locked(&request->fence); spin_unlock_irq(&request->lock); @@ -556,7 +556,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_gem_request_submit(request); + /* + * We need to serialize use of the submit_request() callback with its + * hotplugging performed during an emergency i915_gem_set_wedged(). + * We use the RCU mechanism to mark the critical section in order to + * force i915_gem_set_wedged() to wait until the submit_request() is + * completed before proceeding. + */ + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); break; case FENCE_FREE: @@ -587,6 +596,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == dev_priv->preempt_context); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 96eb52471dad..26249f39de67 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,8 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +#include + struct drm_file; struct drm_i915_gem_object; struct drm_i915_gem_request; @@ -69,9 +71,14 @@ struct i915_priotree { struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; int priority; -#define I915_PRIORITY_MAX 1024 -#define I915_PRIORITY_NORMAL 0 -#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN }; struct i915_gem_capture_list { diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 507c9f0d8df1..54fd4cfa9d07 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -539,12 +539,18 @@ i915_pages_create_for_stolen(struct drm_device *dev, return st; } -static struct sg_table * -i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) { - return i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + + return 0; } static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2d4996de7331..4d712a4db63b 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -164,7 +164,6 @@ static struct i915_mmu_notifier * i915_mmu_notifier_create(struct mm_struct *mm) { struct i915_mmu_notifier *mn; - int ret; mn = kmalloc(sizeof(*mn), GFP_KERNEL); if (mn == NULL) @@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) return ERR_PTR(-ENOMEM); } - /* Protected by mmap_sem (write-lock) */ - ret = __mmu_notifier_register(&mn->mn, mm); - if (ret) { - destroy_workqueue(mn->wq); - kfree(mn); - return ERR_PTR(ret); - } - return mn; } @@ -210,23 +201,40 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn = mm->mn; + struct i915_mmu_notifier *mn; + int err = 0; mn = mm->mn; if (mn) return mn; + mn = i915_mmu_notifier_create(mm->mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + down_write(&mm->mm->mmap_sem); mutex_lock(&mm->i915->mm_lock); - if ((mn = mm->mn) == NULL) { - mn = i915_mmu_notifier_create(mm->mm); - if (!IS_ERR(mn)) - mm->mn = mn; + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else { + /* someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors */ + err = 0; } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - return mn; + if (mn) { + destroy_workqueue(mn->wq); + kfree(mn); + } + + return err ? ERR_PTR(err) : mm->mn; } static int @@ -405,6 +413,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; + unsigned int sg_page_sizes; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -434,6 +443,10 @@ alloc_table: return ERR_PTR(ret); } + sg_page_sizes = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + return st; } @@ -521,7 +534,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pages = __i915_gem_userptr_alloc_pages(obj, pvec, npages); if (!IS_ERR(pages)) { - __i915_gem_object_set_pages(obj, pages); pinned = 0; pages = NULL; } @@ -582,8 +594,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } -static struct sg_table * -i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; @@ -612,9 +623,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (obj->userptr.work) { /* active flag should still be held for the pending work */ if (IS_ERR(obj->userptr.work)) - return ERR_CAST(obj->userptr.work); + return PTR_ERR(obj->userptr.work); else - return ERR_PTR(-EAGAIN); + return -EAGAIN; } pvec = NULL; @@ -650,7 +661,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) release_pages(pvec, pinned, 0); kvfree(pvec); - return pages; + return PTR_ERR_OR_ZERO(pages); } static void diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c new file mode 100644 index 000000000000..e2993857df37 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -0,0 +1,74 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + char options[] = "huge=within_size"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h new file mode 100644 index 000000000000..cca8bdc5b93e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c14552ab270b..653fb69e7ecb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -377,9 +377,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, + erq->context, erq->seqno, erq->priority, jiffies_to_msecs(jiffies - erq->jiffies), erq->head, erq->tail); } @@ -388,9 +388,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->ban_score, ctx->guilty, ctx->active); + ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -1271,6 +1271,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; + erq->priority = request->priotree.priority; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; @@ -1364,6 +1365,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; + e->priority = ctx->priority; e->ban_score = atomic_read(&ctx->ban_score); e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); @@ -1672,8 +1674,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); - error->suspended = dev_priv->pm.suspended; + error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); + error->suspended = dev_priv->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 04f1281d81a5..a2e8114b739d 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -21,12 +21,13 @@ * IN THE SOFTWARE. * */ -#include -#include "i915_drv.h" -#include "intel_uc.h" +#include #include +#include "i915_guc_submission.h" +#include "i915_drv.h" + /** * DOC: GuC-based command submission * @@ -337,7 +338,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; - uint32_t guc_engine_id = engine->guc_id; + u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we @@ -387,13 +388,13 @@ static void guc_stage_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; desc->wq_addr = gfx_addr + GUC_DB_SIZE; desc->wq_size = GUC_WQ_SIZE; - desc->desc_private = (uintptr_t)client; + desc->desc_private = ptr_to_u64(client); } static void guc_stage_desc_fini(struct intel_guc *guc, @@ -499,7 +500,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) const unsigned int engine_id = engine->id; unsigned int n; - for (n = 0; n < ARRAY_SIZE(execlists->port); n++) { + for (n = 0; n < execlists_num_ports(execlists); n++) { struct drm_i915_gem_request *rq; unsigned int count; @@ -643,48 +644,6 @@ static void i915_guc_irq_handler(unsigned long data) * path of i915_guc_submit() above. */ -/** - * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage - * @guc: the guc - * @size: size of area to allocate (both virtual space and memory) - * - * This is a wrapper to create an object for use with the GuC. In order to - * use it inside the GuC, an object needs to be pinned lifetime, so we allocate - * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that - * range is reserved inside GuC. - * - * Return: A i915_vma if successful, otherwise an ERR_PTR. - */ -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err; - - ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (ret) { - vma = ERR_PTR(ret); - goto err; - } - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - /* Check that a doorbell register is in the expected state */ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { @@ -796,8 +755,8 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, - uint32_t engines, - uint32_t priority, + u32 engines, + u32 priority, struct i915_gem_context *ctx) { struct i915_guc_client *client; @@ -1069,6 +1028,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1105,12 +1065,13 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will * result in the register bit being left SET! */ - dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } static void guc_interrupts_release(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1129,8 +1090,8 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) I915_WRITE(GUC_VCS2_VCS1_IER, 0); I915_WRITE(GUC_WD_VECS_IER, 0); - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } int i915_guc_submission_enable(struct drm_i915_private *dev_priv) @@ -1212,55 +1173,3 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) guc_client_free(guc->execbuf_client); guc->execbuf_client = NULL; } - -/** - * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private - */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} - -/** - * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private - */ -int intel_guc_resume(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915_modparams.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + LRC_GUCSHR_PN * PAGE_SIZE; - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} diff --git a/drivers/gpu/drm/i915/i915_guc_submission.h b/drivers/gpu/drm/i915/i915_guc_submission.h new file mode 100644 index 000000000000..cb4353b59059 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_guc_submission.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _I915_GUC_SUBMISSION_H_ +#define _I915_GUC_SUBMISSION_H_ + +#include + +#include "i915_gem.h" + +struct drm_i915_private; + +/* + * This structure primarily describes the GEM object shared with the GuC. + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with + * the GuC, being allocated before the GuC is loaded with its firmware. + * Because there's no way to update the address used by the GuC after + * initialisation, the shared object must stay pinned into the GGTT as + * long as the GuC is in use. We also keep the first page (only) mapped + * into kernel address space, as it includes shared data that must be + * updated on every request submission. + * + * The single GEM object described here is actually made up of several + * separate areas, as far as the GuC is concerned. The first page (kept + * kmap'd) includes the "process descriptor" which holds sequence data for + * the doorbell, and one cacheline which actually *is* the doorbell; a + * write to this will "ring the doorbell" (i.e. send an interrupt to the + * GuC). The subsequent pages of the client object constitute the work + * queue (a circular array of work items), again described in the process + * descriptor. Work queue pages are mapped momentarily as required. + */ +struct i915_guc_client { + struct i915_vma *vma; + void *vaddr; + struct i915_gem_context *owner; + struct intel_guc *guc; + + /* bitmap of (host) engine ids */ + u32 engines; + u32 priority; + u32 stage_id; + u32 proc_desc_offset; + + u16 doorbell_id; + unsigned long doorbell_offset; + + spinlock_t wq_lock; + /* Per-engine counts of GuC submissions */ + u64 submissions[I915_NUM_ENGINES]; +}; + +int i915_guc_submission_init(struct drm_i915_private *dev_priv); +int i915_guc_submission_enable(struct drm_i915_private *dev_priv); +void i915_guc_submission_disable(struct drm_i915_private *dev_priv); +void i915_guc_submission_fini(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index efd7827ff181..b1296a55c1e4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -404,19 +404,21 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); - dev_priv->rps.pm_iir = 0; + dev_priv->gt_pm.rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); } void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - WARN_ON_ONCE(dev_priv->rps.pm_iir); + WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - dev_priv->rps.interrupts_enabled = true; + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -424,11 +426,13 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (!READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.interrupts_enabled = false; + rps->interrupts_enabled = false; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); @@ -442,7 +446,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) * we will reset the GPU to minimum frequencies, so the current * state of the worker can be discarded. */ - cancel_work_sync(&dev_priv->rps.work); + cancel_work_sync(&rps->work); gen6_reset_rps_interrupts(dev_priv); } @@ -1119,12 +1123,13 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { - const struct intel_rps_ei *prev = &dev_priv->rps.ei; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; u32 events = 0; @@ -1151,28 +1156,29 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) c0 = max(render, media); c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - if (c0 > time * dev_priv->rps.up_threshold) + if (c0 > time * rps->up_threshold) events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * dev_priv->rps.down_threshold) + else if (c0 < time * rps->down_threshold) events = GEN6_PM_RP_DOWN_THRESHOLD; } - dev_priv->rps.ei = now; + rps->ei = now; return events; } static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, rps.work); + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_rps *rps = &dev_priv->gt_pm.rps; bool client_boost = false; int new_delay, adj, min, max; u32 pm_iir = 0; spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); - client_boost = atomic_read(&dev_priv->rps.num_waiters); + if (rps->interrupts_enabled) { + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); } spin_unlock_irq(&dev_priv->irq_lock); @@ -1181,18 +1187,18 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - adj = dev_priv->rps.last_adj; - new_delay = dev_priv->rps.cur_freq; - min = dev_priv->rps.min_freq_softlimit; - max = dev_priv->rps.max_freq_softlimit; + adj = rps->last_adj; + new_delay = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; if (client_boost) - max = dev_priv->rps.max_freq; - if (client_boost && new_delay < dev_priv->rps.boost_freq) { - new_delay = dev_priv->rps.boost_freq; + max = rps->max_freq; + if (client_boost && new_delay < rps->boost_freq) { + new_delay = rps->boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1200,15 +1206,15 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - if (new_delay >= dev_priv->rps.max_freq_softlimit) + if (new_delay >= rps->max_freq_softlimit) adj = 0; } else if (client_boost) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) - new_delay = dev_priv->rps.efficient_freq; - else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) - new_delay = dev_priv->rps.min_freq_softlimit; + if (rps->cur_freq > rps->efficient_freq) + new_delay = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_delay = rps->min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { if (adj < 0) @@ -1216,13 +1222,13 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - if (new_delay <= dev_priv->rps.min_freq_softlimit) + if (new_delay <= rps->min_freq_softlimit) adj = 0; } else { /* unknown event */ adj = 0; } - dev_priv->rps.last_adj = adj; + rps->last_adj = adj; /* sysfs frequency interfaces may have snuck in while servicing the * interrupt @@ -1232,15 +1238,15 @@ static void gen6_pm_rps_work(struct work_struct *work) if (intel_set_rps(dev_priv, new_delay)) { DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - dev_priv->rps.last_adj = 0; + rps->last_adj = 0; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) + if (rps->interrupts_enabled) gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } @@ -1382,10 +1388,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(&execlists->port[0])) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { @@ -1723,12 +1727,14 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * the work queue. */ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&dev_priv->rps.work); + if (rps->interrupts_enabled) { + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; + schedule_work(&rps->work); } spin_unlock(&dev_priv->irq_lock); } @@ -2254,18 +2260,14 @@ static void ivb_err_int_handler(struct drm_i915_private *dev_priv) static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) { u32 serr_int = I915_READ(SERR_INT); + enum pipe pipe; if (serr_int & SERR_INT_POISON) DRM_ERROR("PCH poison interrupt\n"); - if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A); - - if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B); - - if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C); + for_each_pipe(dev_priv, pipe) + if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe)) + intel_pch_fifo_underrun_irq_handler(dev_priv, pipe); I915_WRITE(SERR_INT, serr_int); } @@ -3163,10 +3165,17 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], ~dev_priv->de_irq_mask[pipe] | extra_ier); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -3176,8 +3185,15 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ @@ -3598,16 +3614,15 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; + for_each_pipe(dev_priv, pipe) { + dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; - for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], de_pipe_enables); + } GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); @@ -4000,11 +4015,12 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&rps->work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) @@ -4020,7 +4036,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - dev_priv->rps.pm_intrmsk_mbz = 0; + rps->pm_intrmsk_mbz = 0; /* * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer @@ -4029,10 +4045,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) * TODO: verify if this can be reproduced on VLV,CHV. */ if (INTEL_GEN(dev_priv) <= 7) - dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_GEN(dev_priv) >= 8) - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ @@ -4166,7 +4182,7 @@ int intel_irq_install(struct drm_i915_private *dev_priv) * interrupts as enabled _before_ actually enabling them to avoid * special cases in our ordering checks. */ - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); } @@ -4182,7 +4198,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) { drm_irq_uninstall(&dev_priv->drm); intel_hpd_cancel_work(dev_priv); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; } /** @@ -4195,7 +4211,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) { dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; synchronize_irq(dev_priv->drm.irq); } @@ -4208,7 +4224,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) { - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 9dff323a83d3..b4faeb6aa2bd 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -146,9 +146,6 @@ i915_param_named(disable_display, bool, 0400, i915_param_named_unsafe(enable_cmd_parser, bool, 0400, "Enable command parsing (true=enabled [default], false=disabled)"); -i915_param_named_unsafe(use_mmio_flip, int, 0600, - "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)"); - i915_param_named(mmio_debug, int, 0600, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 4f3f8d650194..c7292268ed43 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -49,7 +49,6 @@ param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ - param(int, use_mmio_flip, 0) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index da60866b6628..bf467f30c99b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -54,8 +54,14 @@ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +#define GLK_COLORS \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } /* Keep in gen based order, and chronological order within a gen */ + +#define GEN_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K + #define GEN2_FEATURES \ .gen = 2, .num_pipes = 1, \ .has_overlay = 1, .overlay_needs_physical = 1, \ @@ -65,6 +71,7 @@ .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i830_info __initconst = { @@ -98,6 +105,7 @@ static const struct intel_device_info intel_i865g_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i915g_info __initconst = { @@ -161,6 +169,7 @@ static const struct intel_device_info intel_pineview_info __initconst = { .ring_mask = RENDER_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i965g_info __initconst = { @@ -203,6 +212,7 @@ static const struct intel_device_info intel_gm45_info __initconst = { .ring_mask = RENDER_RING | BSD_RING, \ .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_ironlake_d_info __initconst = { @@ -226,6 +236,7 @@ static const struct intel_device_info intel_ironlake_m_info __initconst = { .has_rc6p = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS #define SNB_D_PLATFORM \ @@ -269,6 +280,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info __initconst = .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ IVB_CURSOR_OFFSETS #define IVB_D_PLATFORM \ @@ -325,11 +337,12 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_snoop = true, .ring_mask = RENDER_RING | BSD_RING | BLT_RING, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS }; -#define HSW_FEATURES \ +#define G75_FEATURES \ GEN7_FEATURES, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ .has_ddi = 1, \ @@ -341,7 +354,7 @@ static const struct intel_device_info intel_valleyview_info __initconst = { .has_runtime_pm = 1 #define HSW_PLATFORM \ - HSW_FEATURES, \ + G75_FEATURES, \ .platform = INTEL_HASWELL, \ .has_l3_dpf = 1 @@ -360,16 +373,18 @@ static const struct intel_device_info intel_haswell_gt3_info __initconst = { .gt = 3, }; -#define BDW_FEATURES \ - HSW_FEATURES, \ +#define GEN8_FEATURES \ + G75_FEATURES, \ BDW_COLORS, \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_2M, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1, \ .has_reset_engine = 1 #define BDW_PLATFORM \ - BDW_FEATURES, \ + GEN8_FEATURES, \ .gen = 8, \ .platform = INTEL_BROADWELL @@ -415,19 +430,31 @@ static const struct intel_device_info intel_cherryview_info __initconst = { .has_reset_engine = 1, .has_snoop = true, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_CHV_PIPEOFFSETS, CURSOR_OFFSETS, CHV_COLORS, }; -#define SKL_PLATFORM \ - BDW_FEATURES, \ - .gen = 9, \ - .platform = INTEL_SKYLAKE, \ +#define GEN9_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define GEN9_FEATURES \ + GEN8_FEATURES, \ + GEN9_DEFAULT_PAGE_SIZES, \ + .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ + .has_ipc = 1, \ .ddb_size = 896 +#define SKL_PLATFORM \ + GEN9_FEATURES, \ + .gen = 9, \ + .platform = INTEL_SKYLAKE + static const struct intel_device_info intel_skylake_gt1_info __initconst = { SKL_PLATFORM, .gt = 1, @@ -463,6 +490,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_ddi = 1, \ .has_fpga_dbg = 1, \ .has_fbc = 1, \ + .has_psr = 1, \ .has_runtime_pm = 1, \ .has_pooled_eu = 0, \ .has_csr = 1, \ @@ -470,6 +498,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_preemption = 1, \ .has_guc = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ @@ -477,6 +506,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_reset_engine = 1, \ .has_snoop = true, \ .has_ipc = 1, \ + GEN9_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS, \ BDW_COLORS @@ -491,17 +521,13 @@ static const struct intel_device_info intel_geminilake_info __initconst = { GEN9_LP_FEATURES, .platform = INTEL_GEMINILAKE, .ddb_size = 1024, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + GLK_COLORS, }; #define KBL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_KABYLAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_KABYLAKE static const struct intel_device_info intel_kabylake_gt1_info __initconst = { KBL_PLATFORM, @@ -520,13 +546,9 @@ static const struct intel_device_info intel_kabylake_gt3_info __initconst = { }; #define CFL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_COFFEELAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .has_ipc = 1, \ - .ddb_size = 896 + .platform = INTEL_COFFEELAKE static const struct intel_device_info intel_coffeelake_gt1_info __initconst = { CFL_PLATFORM, @@ -544,16 +566,17 @@ static const struct intel_device_info intel_coffeelake_gt3_info __initconst = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; +#define GEN10_FEATURES \ + GEN9_FEATURES, \ + .ddb_size = 1024, \ + GLK_COLORS + static const struct intel_device_info intel_cannonlake_gt2_info __initconst = { - BDW_FEATURES, + GEN10_FEATURES, .is_alpha_support = 1, .platform = INTEL_CANNONLAKE, .gen = 10, .gt = 2, - .ddb_size = 1024, - .has_csr = 1, - .has_ipc = 1, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; /* diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ee0d4f14ac98..d2d0a83c09b6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2371,6 +2371,9 @@ enum i915_power_well_id { #define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) #define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) +#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080) +#define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF + #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) @@ -3819,6 +3822,16 @@ enum { #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) +#define _CLKGATE_DIS_PSL_A 0x46520 +#define _CLKGATE_DIS_PSL_B 0x46524 +#define _CLKGATE_DIS_PSL_C 0x46528 +#define DPF_GATING_DIS (1 << 10) +#define DPF_RAM_GATING_DIS (1 << 9) +#define DPFR_GATING_DIS (1 << 8) + +#define CLKGATE_DIS_PSL(pipe) \ + _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B) + /* * GEN10 clock gating regs */ @@ -5671,8 +5684,7 @@ enum { #define CBR_PWM_CLOCK_MUX_SELECT (1<<30) #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) -#define CBR_DPLLBMD_PIPE_C (1<<29) -#define CBR_DPLLBMD_PIPE_B (1<<18) +#define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */ /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 @@ -6993,6 +7005,12 @@ enum { #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) +#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0) +#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) +#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) +#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) +#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) @@ -7164,9 +7182,6 @@ enum { #define SERR_INT _MMIO(0xc4040) #define SERR_INT_POISON (1<<31) -#define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6) -#define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3) -#define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0) #define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3)) /* digital port hotplug */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5c86925a0294..8f3aa4dc0c98 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -108,8 +108,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_restore_fences(dev_priv); - if (IS_GEN4(dev_priv)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61c8727f756..791759f632e1 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -49,7 +49,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, static ssize_t show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6()); + return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled()); } static ssize_t @@ -246,7 +246,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 freq; freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); @@ -261,7 +261,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; ret = intel_gpu_freq(dev_priv, ret); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -275,7 +275,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.cur_freq)); + dev_priv->gt_pm.rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -284,7 +284,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.boost_freq)); + dev_priv->gt_pm.rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -292,6 +292,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -301,12 +302,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, /* Validate against (static) hardware limits */ val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.boost_freq = val; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + rps->boost_freq = val; + mutex_unlock(&dev_priv->pcu_lock); return count; } @@ -318,7 +319,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.efficient_freq)); + dev_priv->gt_pm.rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -327,7 +328,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.max_freq_softlimit)); + dev_priv->gt_pm.rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -335,6 +336,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -344,34 +346,34 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - if (val > dev_priv->rps.rp0_freq) + if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", intel_gpu_freq(dev_priv, val)); - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -384,7 +386,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.min_freq_softlimit)); + dev_priv->gt_pm.rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -392,6 +394,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -401,30 +404,30 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -448,14 +451,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq); + val = intel_gpu_freq(dev_priv, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq); + val = intel_gpu_freq(dev_priv, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq); + val = intel_gpu_freq(dev_priv, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 92f4c5bb7aa7..9cab91ddeb79 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -345,7 +345,7 @@ TRACE_EVENT(i915_gem_object_create, TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -353,7 +353,7 @@ TRACE_EVENT(i915_gem_object_create, __entry->size = obj->base.size; ), - TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) + TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size) ); TRACE_EVENT(i915_gem_shrink, @@ -384,7 +384,7 @@ TRACE_EVENT(i915_vma_bind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) __field(unsigned, flags) ), @@ -396,7 +396,7 @@ TRACE_EVENT(i915_vma_bind, __entry->flags = flags; ), - TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->flags & PIN_MAPPABLE ? ", mappable" : "", __entry->vm) @@ -410,7 +410,7 @@ TRACE_EVENT(i915_vma_unbind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -420,18 +420,18 @@ TRACE_EVENT(i915_vma_unbind, __entry->size = vma->node.size; ), - TP_printk("obj=%p, offset=%016llx size=%x vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->vm) ); TRACE_EVENT(i915_gem_object_pwrite, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -440,18 +440,18 @@ TRACE_EVENT(i915_gem_object_pwrite, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_pread, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -460,17 +460,17 @@ TRACE_EVENT(i915_gem_object_pread, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_fault, - TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), + TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write), TP_ARGS(obj, index, gtt, write), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, index) + __field(u64, index) __field(bool, gtt) __field(bool, write) ), @@ -482,7 +482,7 @@ TRACE_EVENT(i915_gem_object_fault, __entry->write = write; ), - TP_printk("obj=%p, %s index=%u %s", + TP_printk("obj=%p, %s index=%llu %s", __entry->obj, __entry->gtt ? "GTT" : "CPU", __entry->index, @@ -515,14 +515,14 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags), TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) __field(struct i915_address_space *, vm) - __field(u32, size) - __field(u32, align) + __field(u64, size) + __field(u64, align) __field(unsigned int, flags) ), @@ -534,43 +534,11 @@ TRACE_EVENT(i915_gem_evict, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s", __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); -TRACE_EVENT(i915_gem_evict_everything, - TP_PROTO(struct drm_device *dev), - TP_ARGS(dev), - - TP_STRUCT__entry( - __field(u32, dev) - ), - - TP_fast_assign( - __entry->dev = dev->primary->index; - ), - - TP_printk("dev=%d", __entry->dev) -); - -TRACE_EVENT(i915_gem_evict_vm, - TP_PROTO(struct i915_address_space *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(u32, dev) - __field(struct i915_address_space *, vm) - ), - - TP_fast_assign( - __entry->dev = vm->i915->drm.primary->index; - __entry->vm = vm; - ), - - TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) -); - TRACE_EVENT(i915_gem_evict_node, TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags), TP_ARGS(vm, node, flags), @@ -593,12 +561,29 @@ TRACE_EVENT(i915_gem_evict_node, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x", + TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x", __entry->dev, __entry->vm, __entry->start, __entry->size, __entry->color, __entry->flags) ); +TRACE_EVENT(i915_gem_evict_vm, + TP_PROTO(struct i915_address_space *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u32, dev) + __field(struct i915_address_space *, vm) + ), + + TP_fast_assign( + __entry->dev = vm->i915->drm.primary->index; + __entry->vm = vm; + ), + + TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) +); + TRACE_EVENT(i915_gem_ring_sync_to, TP_PROTO(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from), @@ -649,29 +634,6 @@ TRACE_EVENT(i915_gem_request_queue, __entry->flags) ); -TRACE_EVENT(i915_gem_ring_flush, - TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush), - TP_ARGS(req, invalidate, flush), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, ring) - __field(u32, invalidate) - __field(u32, flush) - ), - - TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->ring = req->engine->id; - __entry->invalidate = invalidate; - __entry->flush = flush; - ), - - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", - __entry->dev, __entry->ring, - __entry->invalidate, __entry->flush) -); - DECLARE_EVENT_CLASS(i915_gem_request, TP_PROTO(struct drm_i915_gem_request *req), TP_ARGS(req), diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 12fc250b47b9..af3d7cc53fa1 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -99,6 +99,11 @@ __T; \ }) +static inline u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t)ptr; +} + #define u64_to_ptr(T, x) ({ \ typecheck(u64, x); \ (T *)(uintptr_t)(x); \ @@ -119,4 +124,17 @@ static inline void __list_del_many(struct list_head *head, WRITE_ONCE(head->next, first); } +/* + * Wait until the work is finally complete, even if it tries to postpone + * by requeueing itself. Note, that if the worker never cancels itself, + * we will spin forever. + */ +static inline void drain_delayed_work(struct delayed_work *dw) +{ + do { + while (flush_delayed_work(dw)) + ; + } while (delayed_work_pending(dw)); +} + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 02d1a5eacb00..4dce2e0197d9 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -266,6 +266,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; + GEM_BUG_ON(!vma->pages); + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -278,13 +280,16 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + int err; /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(vma->vm->i915); lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + err = -ENODEV; + goto err; + } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); @@ -294,14 +299,36 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, vma->node.start, vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); + if (ptr == NULL) { + err = -ENOMEM; + goto err; + } vma->iomap = ptr; } __i915_vma_pin(vma); + + err = i915_vma_pin_fence(vma); + if (err) + goto err_unpin; + return ptr; + +err_unpin: + __i915_vma_unpin(vma); +err: + return IO_ERR_PTR(err); +} + +void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->obj->base.dev->struct_mutex); + + GEM_BUG_ON(vma->iomap == NULL); + + i915_vma_unpin_fence(vma); + i915_vma_unpin(vma); } void i915_vma_unpin_and_release(struct i915_vma **p_vma) @@ -471,25 +498,64 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) return ret; + GEM_BUG_ON(vma->pages); + + ret = vma->vm->set_pages(vma); + if (ret) + goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; - goto err_unpin; + goto err_clear; } ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, obj->cache_level, flags); if (ret) - goto err_unpin; + goto err_clear; } else { + /* + * We only support huge gtt pages through the 48b PPGTT, + * however we also don't want to force any alignment for + * objects which need to be tightly packed into the low 32bits. + * + * Note that we assume that GGTT are limited to 4GiB for the + * forseeable future. See also i915_ggtt_offset(). + */ + if (upper_32_bits(end - 1) && + vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + /* + * We can't mix 64K and 4K PTEs in the same page-table + * (2M block), and so to avoid the ugliness and + * complexity of coloring we opt for just aligning 64K + * objects to 2M. + */ + u64 page_alignment = + rounddown_pow_of_two(vma->page_sizes.sg | + I915_GTT_PAGE_SIZE_2M); + + /* + * Check we don't expand for the limited Global GTT + * (mappable aperture is even more precious!). This + * also checks that we exclude the aliasing-ppgtt. + */ + GEM_BUG_ON(i915_vma_is_ggtt(vma)); + + alignment = max(alignment, page_alignment); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + } + ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, start, end, flags); if (ret) - goto err_unpin; + goto err_clear; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -504,6 +570,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return 0; +err_clear: + vma->vm->clear_pages(vma); err_unpin: i915_gem_object_unpin_pages(obj); return ret; @@ -517,6 +585,8 @@ i915_vma_remove(struct i915_vma *vma) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + vma->vm->clear_pages(vma); + drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); @@ -569,8 +639,8 @@ int __i915_vma_do_pin(struct i915_vma *vma, err_remove: if ((bound & I915_VMA_BIND_MASK) == 0) { - GEM_BUG_ON(vma->pages); i915_vma_remove(vma); + GEM_BUG_ON(vma->pages); } err_unpin: __i915_vma_unpin(vma); @@ -620,6 +690,30 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } +void i915_vma_revoke_mmap(struct i915_vma *vma) +{ + struct drm_vma_offset_node *node = &vma->obj->base.vma_node; + u64 vma_offset; + + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (!i915_vma_has_userfault(vma)) + return; + + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(!vma->obj->userfault_count); + + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, + drm_vma_node_offset_addr(node) + vma_offset, + vma->size, + 1); + + i915_vma_unset_userfault(vma); + if (!--vma->obj->userfault_count) + list_del(&vma->obj->userfault_link); +} + int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -683,11 +777,13 @@ int i915_vma_unbind(struct i915_vma *vma) return ret; /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); + i915_vma_revoke_mmap(vma); __i915_vma_iounmap(vma); vma->flags &= ~I915_VMA_CAN_FENCE; } + GEM_BUG_ON(vma->fence); + GEM_BUG_ON(i915_vma_has_userfault(vma)); if (likely(!vma->vm->closed)) { trace_i915_vma_unbind(vma); @@ -695,13 +791,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - i915_vma_remove(vma); destroy: diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e811067c7724..1e2bc9b3c3ac 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,7 @@ struct i915_vma { void __iomem *iomap; u64 size; u64 display_alignment; + struct i915_page_sizes page_sizes; u32 fence_size; u32 fence_alignment; @@ -65,7 +66,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ unsigned int open_count; - unsigned int flags; + unsigned long flags; /** * How many users have pinned this object in GTT space. The following * users can each hold at most one reference: pwrite/pread, execbuffer @@ -87,6 +88,8 @@ struct i915_vma { #define I915_VMA_GGTT BIT(8) #define I915_VMA_CAN_FENCE BIT(9) #define I915_VMA_CLOSED BIT(10) +#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -145,6 +148,22 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) return vma->flags & I915_VMA_CLOSED; } +static inline bool i915_vma_set_userfault(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline void i915_vma_unset_userfault(struct i915_vma *vma) +{ + return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline bool i915_vma_has_userfault(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) { return vma->active; @@ -243,6 +262,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -321,12 +341,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * Callers must hold the struct_mutex. This function is only valid to be * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} +void i915_vma_unpin_iomap(struct i915_vma *vma); static inline struct page *i915_vma_first_page(struct i915_vma *vma) { @@ -349,15 +364,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) +int i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); + +static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; } /** @@ -372,10 +385,8 @@ static inline void i915_vma_unpin_fence(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } + if (vma->fence) + __i915_vma_unpin_fence(vma); } #endif diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 27743be5b768..0ddba16fde1b 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -754,7 +754,7 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv, { struct intel_encoder *encoder; - if (WARN_ON(pipe >= I915_MAX_PIPES)) + if (WARN_ON(pipe >= INTEL_INFO(dev_priv)->num_pipes)) return NULL; /* MST */ diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b881ce8596ee..9d5b42953436 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -431,27 +431,6 @@ parse_general_features(struct drm_i915_private *dev_priv, dev_priv->vbt.fdi_rx_polarity_inverted); } -static void -parse_general_definitions(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) -{ - const struct bdb_general_definitions *general; - - general = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (general) { - u16 block_size = get_blocksize(general); - if (block_size >= sizeof(*general)) { - int bus_pin = general->crt_ddc_gmbus_pin; - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) - dev_priv->vbt.crt_ddc_pin = bus_pin; - } else { - DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n", - block_size); - } - } -} - static const struct child_device_config * child_device_ptr(const struct bdb_general_definitions *defs, int i) { @@ -459,41 +438,24 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i) } static void -parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) { struct sdvo_device_mapping *mapping; - const struct bdb_general_definitions *defs; const struct child_device_config *child; - int i, child_device_num, count; - u16 block_size; - - defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!defs) { - DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n"); - return; - } + int i, count = 0; /* - * Only parse SDVO mappings when the general definitions block child - * device size matches that of the *legacy* child device config - * struct. Thus, SDVO mapping will be skipped for newer VBT. + * Only parse SDVO mappings on gens that could have SDVO. This isn't + * accurate and doesn't have to be, as long as it's not too strict. */ - if (defs->child_dev_size != LEGACY_CHILD_DEVICE_CONFIG_SIZE) { - DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n"); + if (!IS_GEN(dev_priv, 3, 7)) { + DRM_DEBUG_KMS("Skipping SDVO device mapping\n"); return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); - /* get the number of child device */ - child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; - count = 0; - for (i = 0; i < child_device_num; i++) { - child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ - continue; - } + + for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { + child = dev_priv->vbt.child_dev + i; + if (child->slave_addr != SLAVE_ADDR1 && child->slave_addr != SLAVE_ADDR2) { /* @@ -544,7 +506,6 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, /* No SDVO device info is found */ DRM_DEBUG_KMS("No SDVO device info is found in VBT\n"); } - return; } static void @@ -1111,7 +1072,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, } static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, - const struct bdb_header *bdb) + u8 bdb_version) { struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; @@ -1215,7 +1176,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, sanitize_aux_ch(dev_priv, port); } - if (bdb->version >= 158) { + if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", @@ -1225,7 +1186,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } /* Parse the I_boost config for SKL and above */ - if (bdb->version >= 196 && child->iboost) { + if (bdb_version >= 196 && child->iboost) { info->dp_boost_level = translate_iboost(child->dp_iboost_level); DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n", port_name(port), info->dp_boost_level); @@ -1235,40 +1196,52 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } } -static void parse_ddi_ports(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) return; - if (bdb->version < 155) + if (bdb_version < 155) return; for (port = PORT_A; port < I915_MAX_PORTS; port++) - parse_ddi_port(dev_priv, port, bdb); + parse_ddi_port(dev_priv, port, bdb_version); } static void -parse_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_general_definitions(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { const struct bdb_general_definitions *defs; const struct child_device_config *child; - struct child_device_config *child_dev_ptr; int i, child_device_num, count; u8 expected_size; u16 block_size; + int bus_pin; defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); if (!defs) { DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n"); return; } + + block_size = get_blocksize(defs); + if (block_size < sizeof(*defs)) { + DRM_DEBUG_KMS("General definitions block too small (%u)\n", + block_size); + return; + } + + bus_pin = defs->crt_ddc_gmbus_pin; + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) + dev_priv->vbt.crt_ddc_pin = bus_pin; + if (bdb->version < 106) { expected_size = 22; } else if (bdb->version < 111) { @@ -1298,18 +1271,14 @@ parse_device_mapping(struct drm_i915_private *dev_priv, return; } - /* get the block size of general definitions */ - block_size = get_blocksize(defs); /* get the number of child device */ child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; count = 0; /* get the number of child device that is present */ for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } count++; } if (!count) { @@ -1326,36 +1295,18 @@ parse_device_mapping(struct drm_i915_private *dev_priv, count = 0; for (i = 0; i < child_device_num; i++) { child = child_device_ptr(defs, i); - if (!child->device_type) { - /* skip the device block if device type is invalid */ + if (!child->device_type) continue; - } - - child_dev_ptr = dev_priv->vbt.child_dev + count; - count++; /* * Copy as much as we know (sizeof) and is available * (child_dev_size) of the child device. Accessing the data must * depend on VBT version. */ - memcpy(child_dev_ptr, child, + memcpy(dev_priv->vbt.child_dev + count, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - - /* - * copied full block, now init values when they are not - * available in current version - */ - if (bdb->version < 196) { - /* Set default values for bits added from v196 */ - child_dev_ptr->iboost = 0; - child_dev_ptr->hpd_invert = 0; - } - - if (bdb->version < 192) - child_dev_ptr->lspcon = 0; + count++; } - return; } /* Common defaults which may be overridden by VBT. */ @@ -1536,14 +1487,15 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_lfp_panel_data(dev_priv, bdb); parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); - parse_sdvo_device_mapping(dev_priv, bdb); - parse_device_mapping(dev_priv, bdb); parse_driver_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); parse_mipi_config(dev_priv, bdb); parse_mipi_sequence(dev_priv, bdb); - parse_ddi_ports(dev_priv, bdb); + + /* Further processing on pre-parsed data */ + parse_sdvo_device_mapping(dev_priv, bdb->version); + parse_ddi_ports(dev_priv, bdb->version); out: if (!vbt) { diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 87fc42b19336..b2a6d62b71c0 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -503,7 +503,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, else cmd = 0; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -513,7 +513,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->sb_lock); @@ -590,7 +590,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -600,7 +600,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -656,10 +656,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -712,9 +712,9 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -928,12 +928,12 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, WARN_ON((cdclk == 24000) != (vco == 0)); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -975,9 +975,9 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1268,10 +1268,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, } /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", @@ -1300,10 +1300,10 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", @@ -1518,12 +1518,12 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider, pcu_ack; int ret; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1575,9 +1575,9 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, val); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ff9ecd211abb..b8315bca852b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,7 +74,7 @@ #define I9XX_CSC_COEFF_1_0 \ ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool crtc_state_is_legacy(struct drm_crtc_state *state) +static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && @@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) } mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); - if (!crtc_state_is_legacy(state)) { + if (!crtc_state_is_legacy_gamma(state)) { mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); } @@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(state->crtc)->pipe; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state) glk_load_degamma_lut(state); - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) uint32_t i, lut_size; uint32_t word0, word1; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); @@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc, return 0; /* - * We also allow no degamma lut and a gamma lut at the legacy + * We also allow no degamma lut/ctm and a gamma lut at the legacy * size (256 entries). */ - if (!crtc_state->degamma_lut && - crtc_state->gamma_lut && - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) + if (crtc_state_is_legacy_gamma(crtc_state)) return 0; return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 954070255b4d..668e8c3e791d 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -213,6 +213,19 @@ static void pch_post_disable_crt(struct intel_encoder *encoder, intel_disable_crt(encoder, old_crtc_state, old_conn_state); } +static void hsw_disable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_crtc *crtc = old_crtc_state->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + static void hsw_post_disable_crt(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) @@ -225,6 +238,58 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, lpt_disable_iclkip(dev_priv); intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); + + WARN_ON(!old_crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); +} + +static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + +static void hsw_pre_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); +} + +static void hsw_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + + WARN_ON(!intel_crtc->config->has_pch_encoder); + + intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + + intel_wait_for_vblank(dev_priv, pipe); + intel_wait_for_vblank(dev_priv, pipe); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void intel_enable_crt(struct intel_encoder *encoder, @@ -890,26 +955,31 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; - crt->base.compute_config = intel_crt_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { - crt->base.disable = pch_disable_crt; - crt->base.post_disable = pch_post_disable_crt; - } else { - crt->base.disable = intel_disable_crt; - } - crt->base.enable = intel_enable_crt; if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; + + crt->base.compute_config = intel_crt_compute_config; if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; + crt->base.pre_enable = hsw_pre_enable_crt; + crt->base.enable = hsw_enable_crt; + crt->base.disable = hsw_disable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { + if (HAS_PCH_SPLIT(dev_priv)) { + crt->base.disable = pch_disable_crt; + crt->base.post_disable = pch_post_disable_crt; + } else { + crt->base.disable = intel_disable_crt; + } crt->base.port = PORT_NONE; crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; + crt->base.enable = intel_enable_crt; } intel_connector->get_hw_state = intel_connector_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index cdfb624eb82d..ea5d5c9645a4 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -216,7 +216,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 93cbbcbbc193..b307b6fe1ce3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -602,8 +602,10 @@ cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); return cnl_ddi_translations_hdmi_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -621,8 +623,10 @@ cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); return cnl_ddi_translations_dp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } @@ -641,8 +645,10 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } else if (voltage == VOLTAGE_INFO_1_05V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); return cnl_ddi_translations_edp_1_05V; - } else + } else { + *n_entries = 1; /* shut up gcc */ MISSING_CASE(voltage); + } return NULL; } else { return cnl_get_buf_trans_dp(dev_priv, n_entries); @@ -1214,6 +1220,9 @@ static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; + if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) + return 0; + return dco_freq / (p0 * p1 * p2 * 5); } @@ -1713,7 +1722,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, out: if (ret && IS_GEN9_LP(dev_priv)) { tmp = I915_READ(BXT_PHY_CTL(port)); - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | + BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) DRM_ERROR("Port %c enabled but PHY powered down? " "(PHY_CTL %08x)\n", port_name(port), tmp); @@ -2161,7 +2171,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!link_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2205,8 +2216,16 @@ static void intel_ddi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; int type = encoder->type; + WARN_ON(intel_crtc->config->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { intel_ddi_pre_enable_dp(encoder, pipe_config->port_clock, @@ -2235,12 +2254,21 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, uint32_t val; bool wait = false; - /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { + /* + * old_crtc_state and old_conn_state are NULL when called from + * DP_MST. The main connector associated with this port is never + * bound to a crtc for MST. + */ + bool is_mst = !old_crtc_state; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } val = I915_READ(DDI_BUF_CTL(port)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 615c58e48613..7e91dc9a0fcf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1539,7 +1539,7 @@ static void chv_enable_pll(struct intel_crtc *crtc, * DPLLCMD is AWOL. Use chicken bits to propagate * the value from DPLLBMD to either pipe B or C. */ - I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C); + I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe)); I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md); I915_WRITE(CBR4_VLV, 0); dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md; @@ -1568,11 +1568,12 @@ static int intel_num_dvo_pipes(struct drm_i915_private *dev_priv) return count; } -static void i9xx_enable_pll(struct intel_crtc *crtc) +static void i9xx_enable_pll(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = DPLL(crtc->pipe); - u32 dpll = crtc->config->dpll_hw_state.dpll; + u32 dpll = crtc_state->dpll_hw_state.dpll; int i; assert_pipe_disabled(dev_priv, crtc->pipe); @@ -1609,7 +1610,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DPLL_MD(crtc->pipe), - crtc->config->dpll_hw_state.dpll_md); + crtc_state->dpll_hw_state.dpll_md); } else { /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. @@ -1627,15 +1628,6 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) } } -/** - * i9xx_disable_pll - disable a PLL - * @dev_priv: i915 private structure - * @pipe: pipe PLL to disable - * - * Disable the PLL for @pipe, making sure the pipe is off first. - * - * Note! This is for pre-ILK only. - */ static void i9xx_disable_pll(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -2219,8 +2211,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - if (i915_vma_get_fence(vma) == 0) - i915_vma_pin_fence(vma); + i915_vma_pin_fence(vma); } i915_vma_get(vma); @@ -4955,10 +4946,10 @@ void hsw_enable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -4988,9 +4979,9 @@ void hsw_disable_ips(struct intel_crtc *crtc) assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* wait for pcode to finish disabling IPS, which may take up to 42ms */ if (intel_wait_for_register(dev_priv, IPS_CTL, IPS_ENABLE, 0, @@ -5459,6 +5450,20 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc) return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; } +static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, + enum pipe pipe, bool apply) +{ + u32 val = I915_READ(CLKGATE_DIS_PSL(pipe)); + u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS; + + if (apply) + val |= mask; + else + val &= ~mask; + + I915_WRITE(CLKGATE_DIS_PSL(pipe), val); +} + static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -5469,13 +5474,11 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + bool psl_clkgate_wa; if (WARN_ON(intel_crtc->active)) return; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (intel_crtc->config->shared_dpll) @@ -5509,19 +5512,17 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->active = true; - if (intel_crtc->config->has_pch_encoder) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - else - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); - if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(pipe_config); + /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && + intel_crtc->config->pch_pfit.enabled; + if (psl_clkgate_wa) + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); + if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); else @@ -5555,11 +5556,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) { + if (psl_clkgate_wa) { intel_wait_for_vblank(dev_priv, pipe); - intel_wait_for_vblank(dev_priv, pipe); - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); } /* If we change the relative order between pipe/planes enabling, we need @@ -5655,9 +5654,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); @@ -5682,9 +5678,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - if (old_crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -5894,7 +5887,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); - i9xx_enable_pll(intel_crtc); + i9xx_enable_pll(intel_crtc, pipe_config); i9xx_pfit_enable(intel_crtc); @@ -8846,11 +8839,11 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); @@ -10245,58 +10238,44 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc, &pipe_config->fdi_m_n); } -/** Returns the currently programmed mode of the given pipe. */ -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc) +/* Returns the currently programmed mode of the given encoder. */ +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc_state *crtc_state; struct drm_display_mode *mode; - struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc; + enum pipe pipe; + + if (!encoder->get_hw_state(encoder, &pipe)) + return NULL; + + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); mode = kzalloc(sizeof(*mode), GFP_KERNEL); if (!mode) return NULL; - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); - if (!pipe_config) { + crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); + if (!crtc_state) { kfree(mode); return NULL; } - /* - * Construct a pipe_config sufficient for getting the clock info - * back out of crtc_clock_get. - * - * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need - * to use a real value here instead. - */ - pipe_config->cpu_transcoder = (enum transcoder) pipe; - pipe_config->pixel_multiplier = 1; - pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe)); - pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe)); - pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); - i9xx_crtc_clock_get(intel_crtc, pipe_config); + crtc_state->base.crtc = &crtc->base; - mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; - mode->hdisplay = (htot & 0xffff) + 1; - mode->htotal = ((htot & 0xffff0000) >> 16) + 1; - mode->hsync_start = (hsync & 0xffff) + 1; - mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; - mode->vdisplay = (vtot & 0xffff) + 1; - mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; - mode->vsync_start = (vsync & 0xffff) + 1; - mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; + if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { + kfree(crtc_state); + kfree(mode); + return NULL; + } - drm_mode_set_name(mode); + encoder->get_config(encoder, crtc_state); - kfree(pipe_config); + intel_mode_from_pipe_config(mode, crtc_state); + + kfree(crtc_state); return mode; } @@ -11336,6 +11315,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); + PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb4); + PIPE_CONF_CHECK_X(dpll_hw_state.pll0); + PIPE_CONF_CHECK_X(dpll_hw_state.pll1); + PIPE_CONF_CHECK_X(dpll_hw_state.pll2); + PIPE_CONF_CHECK_X(dpll_hw_state.pll3); + PIPE_CONF_CHECK_X(dpll_hw_state.pll6); + PIPE_CONF_CHECK_X(dpll_hw_state.pll8); + PIPE_CONF_CHECK_X(dpll_hw_state.pll9); + PIPE_CONF_CHECK_X(dpll_hw_state.pll10); + PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -12220,7 +12211,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (updated & cmask || !cstate->base.active) continue; - if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) + if (skl_ddb_allocation_overlaps(dev_priv, + entries, + &cstate->wm.skl.ddb, + i)) continue; updated |= cmask; @@ -12515,21 +12509,10 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - ret = drm_atomic_helper_setup_commit(state, nonblock); - if (ret) - return ret; - drm_atomic_state_get(state); i915_sw_fence_init(&intel_state->commit_ready, intel_atomic_commit_ready); - ret = intel_atomic_prepare_commit(dev, state); - if (ret) { - DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&intel_state->commit_ready); - return ret; - } - /* * The intel_legacy_cursor_update() fast path takes care * of avoiding the vblank waits for simple cursor @@ -12538,19 +12521,37 @@ static int intel_atomic_commit(struct drm_device *dev, * updates happen during the correct frames. Gen9+ have * double buffered watermarks and so shouldn't need this. * - * Do this after drm_atomic_helper_setup_commit() and - * intel_atomic_prepare_commit() because we still want - * to skip the flip and fb cleanup waits. Although that - * does risk yanking the mapping from under the display - * engine. + * Unset state->legacy_cursor_update before the call to + * drm_atomic_helper_setup_commit() because otherwise + * drm_atomic_helper_wait_for_flip_done() is a noop and + * we get FIFO underruns because we didn't wait + * for vblank. * * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. */ - if (INTEL_GEN(dev_priv) < 9) - state->legacy_cursor_update = false; + if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) + if (new_crtc_state->wm.need_postvbl_update || + new_crtc_state->update_wm_post) + state->legacy_cursor_update = false; + } + + ret = intel_atomic_prepare_commit(dev, state); + if (ret) { + DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); + i915_sw_fence_commit(&intel_state->commit_ready); + return ret; + } + + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (!ret) + ret = drm_atomic_helper_swap_state(state, true); - ret = drm_atomic_helper_swap_state(state, true); if (ret) { i915_sw_fence_commit(&intel_state->commit_ready); @@ -14748,10 +14749,10 @@ static struct intel_connector *intel_encoder_find_connector(struct intel_encoder } static bool has_pch_trancoder(struct drm_i915_private *dev_priv, - enum transcoder pch_transcoder) + enum pipe pch_transcoder) { return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || - (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); } static void intel_sanitize_crtc(struct intel_crtc *crtc, @@ -14834,7 +14835,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, * PCH transcoders B and C would prevent enabling the south * error interrupt (see cpt_can_enable_serr_int()). */ - if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + if (has_pch_trancoder(dev_priv, crtc->pipe)) crtc->pch_fifo_underrun_disabled = true; } } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 90e756c76f10..b0f446b68f42 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -137,32 +137,20 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); -static int intel_dp_num_rates(u8 link_bw_code) -{ - switch (link_bw_code) { - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - link_bw_code); - case DP_LINK_BW_1_62: - return 1; - case DP_LINK_BW_2_7: - return 2; - case DP_LINK_BW_5_4: - return 3; - } -} - /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { - int i, num_rates; + int i, max_rate; - num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < num_rates; i++) + for (i = 0; i < ARRAY_SIZE(default_rates); i++) { + if (default_rates[i] > max_rate) + break; intel_dp->sink_rates[i] = default_rates[i]; + } - intel_dp->num_sink_rates = num_rates; + intel_dp->num_sink_rates = i; } /* Theoretical max between source and sink */ @@ -254,15 +242,15 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); - } else { + } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || + IS_BROADWELL(dev_priv)) { source_rates = default_rates; size = ARRAY_SIZE(default_rates); + } else { + source_rates = default_rates; + size = ARRAY_SIZE(default_rates) - 1; } - /* This depends on the fact that 5.4 is last value in the array */ - if (!intel_dp_source_supports_hbr2(intel_dp)) - size--; - intel_dp->source_rates = source_rates; intel_dp->num_source_rates = size; } @@ -1482,14 +1470,9 @@ intel_dp_aux_init(struct intel_dp *intel_dp) bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1]; - if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || - IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9)) - return true; - else - return false; + return max_rate >= 540000; } static void @@ -2308,8 +2291,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ intel_display_power_put(dev_priv, intel_dp->aux_power_domain); @@ -5286,7 +5269,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, * seems sufficient to avoid this problem. */ if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", vbt.t11_t12); } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 9a396f483f8b..3c131e2544cf 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -162,14 +162,19 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + /* + * Power down mst path before disabling the port, otherwise we end + * up getting interrupts from the sink upon detecting link loss. + */ + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, + false); + intel_dp->active_mst_links--; intel_mst->connector = NULL; if (intel_dp->active_mst_links == 0) { intel_dig_port->base.post_disable(&intel_dig_port->base, NULL, NULL); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -196,6 +201,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 09b670929786..de38d014ed39 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = { }, }; -static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) -{ - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | - BIT(phy_info->channel[DPIO_CH0].port); -} - static const struct bxt_ddi_phy_info * bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) { @@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - enum port port; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - return true; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0cab667fff57..b87946dcc53f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1243,7 +1243,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) { - return mask & ~i915->rps.pm_intrmsk_mbz; + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; } void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); @@ -1254,7 +1254,7 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) * We only use drm_irq_uninstall() at unload and VT switch, so * this is the only thing we need to check. */ - return dev_priv->pm.irqs_enabled; + return dev_priv->runtime_pm.irqs_enabled; } int intel_get_crtc_scanline(struct intel_crtc *crtc); @@ -1363,8 +1363,9 @@ struct intel_connector *intel_connector_alloc(void); bool intel_connector_get_hw_state(struct intel_connector *connector); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc); +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder); + enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -1790,7 +1791,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) { - WARN_ONCE(dev_priv->pm.suspended, + WARN_ONCE(dev_priv->runtime_pm.suspended, "Device suspended during HW access\n"); } @@ -1798,7 +1799,7 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } @@ -1823,7 +1824,7 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) static inline void disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -1840,7 +1841,7 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) static inline void enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); } void intel_runtime_pm_get(struct drm_i915_private *dev_priv); @@ -1893,7 +1894,8 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); @@ -1902,7 +1904,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -static inline int intel_enable_rc6(void) +static inline int intel_rc6_enabled(void) { return i915_modparams.enable_rc6; } diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 20a7b004ffd7..66bbedc5fa01 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -790,14 +790,19 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; DRM_DEBUG_KMS("\n"); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + /* * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 5c562e1f56ae..53c9b763f4ce 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -379,32 +379,15 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = { * chip being on DVOB/C and having multiple pipes. */ static struct drm_display_mode * -intel_dvo_get_current_mode(struct drm_connector *connector) +intel_dvo_get_current_mode(struct intel_encoder *encoder) { - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); - uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg); - struct drm_display_mode *mode = NULL; + struct drm_display_mode *mode; - /* If the DVO port is active, that'll be the LVDS, so we can pull out - * its timings to get how the BIOS set up the panel. - */ - if (dvo_val & DVO_ENABLE) { - struct intel_crtc *crtc; - int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; - - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - if (crtc) { - mode = intel_crtc_mode_get(dev, &crtc->base); - if (mode) { - mode->type |= DRM_MODE_TYPE_PREFERRED; - if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PHSYNC; - if (dvo_val & DVO_VSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PVSYNC; - } - } + mode = intel_encoder_current_mode(encoder); + if (mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(mode); + mode->type |= DRM_MODE_TYPE_PREFERRED; } return mode; @@ -551,7 +534,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) * mode being output through DVO. */ intel_panel_init(&intel_connector->panel, - intel_dvo_get_current_mode(connector), + intel_dvo_get_current_mode(intel_encoder), NULL, NULL); intel_dvo->panel_wants_dither = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a28e2a864cf1..a59b2a30ff5a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -39,7 +41,7 @@ #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * PAGE_SIZE) +#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) @@ -613,9 +615,22 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (IS_ERR(ring)) return PTR_ERR(ring); + /* + * Similarly the preempt context must always be available so that + * we can interrupt the engine at any time. + */ + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) { + ring = engine->context_pin(engine, + engine->i915->preempt_context); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto err_unpin_kernel; + } + } + ret = intel_engine_init_breadcrumbs(engine); if (ret) - goto err_unpin; + goto err_unpin_preempt; ret = i915_gem_render_state_init(engine); if (ret) @@ -634,7 +649,10 @@ err_rs_fini: i915_gem_render_state_fini(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); -err_unpin: +err_unpin_preempt: + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); +err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); return ret; } @@ -660,6 +678,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) + engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } @@ -830,11 +850,6 @@ static int wa_add(struct drm_i915_private *dev_priv, #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, i915_reg_t reg) { @@ -845,8 +860,8 @@ static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) return -EINVAL; - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); wa->hw_whitelist_count[engine->id]++; return 0; @@ -980,7 +995,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) GEN9_PBE_COMPRESSED_HASH_SELECTION); WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN); + + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ @@ -1071,13 +1090,33 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -1139,14 +1178,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - /* WaEnableGapsTsvCreditFix:skl */ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE)); @@ -1278,7 +1309,16 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine) /* FtrEnableFastAnisoL1BankingFix: cnl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -1578,6 +1618,164 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +static void print_request(struct drm_printer *m, + struct drm_i915_gem_request *rq, + const char *prefix) +{ + drm_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, + rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, + rq->priotree.priority, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + rq->timeline->common->name); +} + +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct i915_gpu_error *error = &engine->i915->gpu_error; + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_gem_request *rq; + struct rb_node *rb; + u64 addr; + + drm_printf(m, "%s\n", engine->name); + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine), + engine->hangcheck.seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); + drm_printf(m, "\tReset count: %d\n", + i915_reset_engine_count(error, engine)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tlast "); + + rq = i915_gem_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + drm_printf(m, + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + } + + drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", + I915_READ(RING_START(engine->mmio_base)), + rq ? i915_ggtt_offset(rq->ring->vma) : 0); + drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, + rq ? rq->ring->head : 0); + drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, + rq ? rq->ring->tail : 0); + drm_printf(m, "\tRING_CTL: 0x%08x [%s]\n", + I915_READ(RING_CTL(engine->mmio_base)), + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); + + rcu_read_unlock(); + + addr = intel_engine_get_active_head(engine); + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + addr = intel_engine_get_last_batch_head(engine); + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + + if (i915_modparams.enable_execlists) { + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + struct intel_engine_execlists * const execlists = &engine->execlists; + u32 ptr, read, write; + unsigned int idx; + + drm_printf(m, "\tExeclist status: 0x%08x %08x\n", + I915_READ(RING_EXECLIST_STATUS_LO(engine)), + I915_READ(RING_EXECLIST_STATUS_HI(engine))); + + ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); + read = GEN8_CSB_READ_PTR(ptr); + write = GEN8_CSB_WRITE_PTR(ptr); + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + read, execlists->csb_head, + write, + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); + if (read >= GEN8_CSB_ENTRIES) + read = 0; + if (write >= GEN8_CSB_ENTRIES) + write = 0; + if (read > write) + write += GEN8_CSB_ENTRIES; + while (read < write) { + idx = ++read % GEN8_CSB_ENTRIES; + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", + idx, + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), + hws[idx * 2], + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), + hws[idx * 2 + 1]); + } + + rcu_read_lock(); + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + unsigned int count; + + rq = port_unpack(&execlists->port[idx], &count); + if (rq) { + drm_printf(m, "\t\tELSP[%d] count=%d, ", + idx, count); + print_request(m, rq, "rq: "); + } else { + drm_printf(m, "\t\tELSP[%d] idle\n", + idx); + } + } + rcu_read_unlock(); + + spin_lock_irq(&engine->timeline->lock); + for (rb = execlists->first; rb; rb = rb_next(rb)) { + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, + priotree.link) + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->timeline->lock); + } else if (INTEL_GEN(dev_priv) > 6) { + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE(engine))); + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE_READ(engine))); + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", + I915_READ(RING_PP_DIR_DCLV(engine))); + } + + spin_lock_irq(&b->rb_lock); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = rb_entry(rb, typeof(*w), node); + + drm_printf(m, "\t%s [%d] waiting for %x\n", + w->tsk->comm, w->tsk->pid, w->seqno); + } + spin_unlock_irq(&b->rb_lock); + + drm_printf(m, "\n"); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c new file mode 100644 index 000000000000..9e18c4fb9909 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -0,0 +1,265 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc.h" +#include "i915_drv.h" + +static void gen8_guc_raise_irq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +void intel_guc_init_send_regs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + +void intel_guc_init_early(struct intel_guc *guc) +{ + intel_guc_ct_init_early(&guc->ct); + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_nop; + guc->notify = gen8_guc_raise_irq; +} + +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); + + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + + mutex_lock(&guc->send_mutex); + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); + + for (i = 0; i < len; i++) + I915_WRITE(guc_send_reg(guc, i), action[i]); + + POSTING_READ(guc_send_reg(guc, i - 1)); + + intel_guc_notify(guc); + + /* + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. + */ + ret = __intel_wait_for_register_fw(dev_priv, + guc_send_reg(guc, 0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); + if (status != INTEL_GUC_STATUS_SUCCESS) { + /* + * Either the GuC explicitly returned an error (which + * we convert to -EIO here) or no response at all was + * received within the timeout limit (-ETIMEDOUT) + */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + } + + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); + mutex_unlock(&guc->send_mutex); + + return ret; +} + +int intel_guc_sample_forcewake(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 action[2]; + + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; + /* WaRsDisableCoarsePowerGating:skl,bxt */ + if (!intel_rc6_enabled() || + NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + action[1] = 0; + else + /* bit 0 and 1 are for Render and Media domain separately */ + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode + * @guc: intel_guc structure + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma + * + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by + * intel_huc_auth(). + * + * Return: non-zero code on error + */ +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_AUTHENTICATE_HUC, + rsa_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_suspend() - notify GuC entering suspend state + * @dev_priv: i915 device private + */ +int intel_guc_suspend(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + gen9_disable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; + /* any value greater than GUC_POWER_D0 */ + data[1] = GUC_POWER_D1; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_resume() - notify GuC resuming from suspend state + * @dev_priv: i915 device private + */ +int intel_guc_resume(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct i915_gem_context *ctx; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + + ctx = dev_priv->kernel_context; + + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; + data[1] = GUC_POWER_D0; + /* first page is shared data with GuC */ + data[2] = guc_ggtt_offset(ctx->engine[RCS].state) + + LRC_GUCSHR_PN * PAGE_SIZE; + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. + * + * Return: A i915_vma if successful, otherwise an ERR_PTR. + */ +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create(dev_priv, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; + + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h new file mode 100644 index 000000000000..aa9a7b55be6e --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -0,0 +1,110 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_uncore.h" +#include "intel_guc_fwif.h" +#include "intel_guc_ct.h" +#include "intel_guc_log.h" +#include "intel_uc_fw.h" +#include "i915_guc_reg.h" +#include "i915_vma.h" + +struct intel_guc { + struct intel_uc_fw fw; + struct intel_guc_log log; + struct intel_guc_ct ct; + + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *load_err_log; + + /* intel_guc_recv interrupt related state */ + bool interrupts_enabled; + + struct i915_vma *ads_vma; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; + + struct i915_guc_client *execbuf_client; + + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); + /* Cyclic counter mod pagesize */ + u32 db_cacheline; + + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); +}; + +static +inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} + +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + +static inline u32 guc_ggtt_offset(struct i915_vma *vma) +{ + u32 offset = i915_ggtt_offset(vma); + + GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); + + return offset; +} + +void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_send_regs(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); +int intel_guc_suspend(struct drm_i915_private *dev_priv); +int intel_guc_resume(struct drm_i915_private *dev_priv); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); + +int intel_guc_select_fw(struct intel_guc *guc); +int intel_guc_init_hw(struct intel_guc *guc); +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 7eb6b4fa1d6f..1c0a2a3de121 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -178,49 +178,49 @@ */ struct uc_css_header { - uint32_t module_type; + u32 module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ - uint32_t header_size_dw; - uint32_t header_version; - uint32_t module_id; - uint32_t module_vendor; + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; union { struct { - uint8_t day; - uint8_t month; - uint16_t year; + u8 day; + u8 month; + u16 year; }; - uint32_t date; + u32 date; }; - uint32_t size_dw; /* uCode plus header_size_dw */ - uint32_t key_size_dw; - uint32_t modulus_size_dw; - uint32_t exponent_size_dw; + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; union { struct { - uint8_t hour; - uint8_t min; - uint16_t sec; + u8 hour; + u8 min; + u16 sec; }; - uint32_t time; + u32 time; }; char username[8]; char buildnumber[12]; union { struct { - uint32_t branch_client_version; - uint32_t sw_version; + u32 branch_client_version; + u32 sw_version; } guc; struct { - uint32_t sw_version; - uint32_t reserved; + u32 sw_version; + u32 reserved; } huc; }; - uint32_t prod_preprod_fw; - uint32_t reserved[12]; - uint32_t header_info; + u32 prod_preprod_fw; + u32 reserved[12]; + u32 header_info; } __packed; struct guc_doorbell_info { diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index c9e25be4db40..c7a800a3798d 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -386,10 +386,7 @@ int intel_guc_select_fw(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc->fw.path = NULL; - guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.type = INTEL_UC_FW_TYPE_GUC; + intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); if (i915_modparams.guc_firmware_path) { guc->fw.path = i915_modparams.guc_firmware_path; diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 6571d96704ad..76d3eb1e4614 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -21,8 +21,11 @@ * IN THE SOFTWARE. * */ + #include #include + +#include "intel_guc_log.h" #include "i915_drv.h" static void guc_log_capture_logs(struct intel_guc *guc); @@ -525,7 +528,8 @@ int intel_guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; unsigned long offset; - uint32_t size, flags; + u32 flags; + u32 size; int ret; GEM_BUG_ON(guc->log.vma); diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h new file mode 100644 index 000000000000..f512cf79339b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_LOG_H_ +#define _INTEL_GUC_LOG_H_ + +#include + +#include "intel_guc_fwif.h" + +struct drm_i915_private; +struct intel_guc; + +struct intel_guc_log { + u32 flags; + struct i915_vma *vma; + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; + /* logging related stats */ + u32 capture_miss_count; + u32 flush_interrupt_count; + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 flush_count[GUC_MAX_LOG_BUFFER]; +}; + +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); +void i915_guc_log_register(struct drm_i915_private *dev_priv); +void i915_guc_log_unregister(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 8b4b53525422..4b4cf56d29ad 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -21,9 +21,11 @@ * IN THE SOFTWARE. * */ -#include + +#include + +#include "intel_huc.h" #include "i915_drv.h" -#include "intel_uc.h" /** * DOC: HuC Firmware @@ -150,10 +152,7 @@ void intel_huc_select_fw(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); - huc->fw.path = NULL; - huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.type = INTEL_UC_FW_TYPE_HUC; + intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); if (i915_modparams.huc_firmware_path) { huc->fw.path = i915_modparams.huc_firmware_path; diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h new file mode 100644 index 000000000000..aaa38b9e5817 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +#include "intel_uc_fw.h" + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ +}; + +void intel_huc_select_fw(struct intel_huc *huc); +void intel_huc_init_hw(struct intel_huc *huc); +void intel_huc_auth(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 61cac26a8b05..fbfcf88d7fe3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -208,8 +208,9 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - #define WA_TAIL_DWORDS 2 +#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -243,8 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && - USES_PPGTT(dev_priv) && - i915_modparams.use_mmio_flip >= 0) + USES_PPGTT(dev_priv)) return 1; return 0; @@ -348,6 +348,43 @@ find_priolist: return ptr_pack_bits(p, first, 1); } +static void unwind_wa_tail(struct drm_i915_gem_request *rq) +{ + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); + assert_ring_tail_valid(rq->ring, rq->tail); +} + +static void unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *rq, *rn; + struct i915_priolist *uninitialized_var(p); + int last_prio = I915_PRIORITY_INVALID; + + lockdep_assert_held(&engine->timeline->lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->timeline->requests, + link) { + if (i915_gem_request_completed(rq)) + return; + + __i915_gem_request_unsubmit(rq); + unwind_wa_tail(rq); + + GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); + if (rq->priotree.priority != last_prio) { + p = lookup_priolist(engine, + &rq->priotree, + rq->priotree.priority); + p = ptr_mask_bits(p, 1); + + last_prio = rq->priotree.priority; + } + + list_add(&rq->priotree.link, &p->requests); + } +} + static inline void execlists_context_status_change(struct drm_i915_gem_request *rq, unsigned long status) @@ -392,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlists.port; @@ -417,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + elsp_write(desc, elsp); } } @@ -451,26 +493,43 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce = + &engine->i915->preempt_context->engine[engine->id]; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; + + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); + + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); + ce->ring->tail += WA_TAIL_BYTES; + ce->ring->tail &= (ce->ring->size - 1); + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + + for (n = execlists_num_ports(&engine->execlists); --n; ) + elsp_write(0, elsp); + + elsp_write(ce->lrc_desc, elsp); +} + +static bool can_preempt(struct intel_engine_cs *engine) +{ + return INTEL_INFO(engine->i915)->has_logical_ring_preemption; +} + static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_breadcrumb() - * for where we prepare the padding after the end of the - * request. - */ - last->tail = last->wa_tail; - /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -495,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + if (!rb) + goto unlock; + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + if (port_count(&port[0]) > 1) + goto unlock; + + if (can_preempt(engine) && + rb_entry(rb, struct i915_priolist, node)->priority > + max(last->priotree.priority, 0)) { + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + inject_preempt_context(engine); + execlists->preempt = true; + goto unlock; + } else { + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). + */ + if (port_count(&port[1])) + goto unlock; + + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == req:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -547,8 +664,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; - __i915_gem_request_submit(rq); trace_i915_gem_request_in(rq, port_index(port, execlists)); last = rq; @@ -560,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) port_assign(port, last); +unlock: spin_unlock_irq(&engine->timeline->lock); if (submit) @@ -575,12 +691,12 @@ static void execlist_cancel_port_requests(struct intel_engine_execlists *execlists) { struct execlist_port *port = execlists->port; - unsigned int num_ports = ARRAY_SIZE(execlists->port); + unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { struct drm_i915_gem_request *rq = port_request(port); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); memset(port, 0, sizeof(*port)); @@ -645,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static bool execlists_elsp_ready(const struct intel_engine_cs *engine) -{ - const struct execlist_port *port = engine->execlists.port; - - return port_count(&port[0]) + port_count(&port[1]) < 2; -} - /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -660,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; + struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -745,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && + buf[2*head + 1] == PREEMPT_ID) { + execlist_cancel_port_requests(execlists); + + spin_lock_irq(&engine->timeline->lock); + unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); + + GEM_BUG_ON(!execlists->preempt); + execlists->preempt = false; + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists->preempt) + continue; + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -756,6 +882,7 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); trace_i915_gem_request_out(rq); + rq->priotree.priority = INT_MAX; i915_gem_request_put(rq); execlists_port_complete(execlists, port); @@ -775,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (execlists_elsp_ready(engine)) + if (!execlists->preempt) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -788,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&engine->execlists.irq_tasklet); } @@ -808,11 +935,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +{ + return container_of(pt, struct drm_i915_gem_request, priotree); +} + static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = - container_of(pt, struct drm_i915_gem_request, priotree)->engine; + struct intel_engine_cs *engine = pt_to_request(pt)->engine; GEM_BUG_ON(!locked); @@ -831,6 +962,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) struct i915_dependency stack; LIST_HEAD(dfs); + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + if (prio <= READ_ONCE(request->priotree.priority)) return; @@ -866,6 +999,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { + if (i915_gem_request_completed(pt_to_request(p->signaler))) + continue; + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); @@ -879,7 +1015,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == INT_MIN) { + if (request->priotree.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&request->priotree.link)); request->priotree.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) @@ -909,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) } spin_unlock_irq(&engine->timeline->lock); - - /* XXX Do we need to preempt to make room for us and our deps? */ } static struct intel_ring * @@ -1106,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1119,26 +1255,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -/* - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. - * - * The number of DWORDS written are returned using this field. - * - * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding - * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. - */ -static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *batch++ = MI_BATCH_BUFFER_END; - - return batch; -} - static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1184,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = 0; } + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1251,7 +1373,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) break; case 8: wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = gen8_init_perctx_bb; + wa_bb_fn[1] = NULL; break; default: MISSING_CASE(INTEL_GEN(engine->i915)); @@ -1337,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; + execlists->preempt = false; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) @@ -1382,7 +1505,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; struct intel_context *ce; unsigned long flags; @@ -1400,21 +1522,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, execlist_cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, link) { - struct i915_priolist *p; - - if (i915_gem_request_completed(rq)) - break; - - __i915_gem_request_unsubmit(rq); - - p = lookup_priolist(engine, - &rq->priotree, - rq->priotree.priority); - list_add(&rq->priotree.link, - &ptr_mask_bits(p, 1)->requests); - } + unwind_incomplete_requests(engine); spin_unlock_irqrestore(&engine->timeline->lock, flags); @@ -1451,10 +1559,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = - intel_ring_wrap(request->ring, - request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); - assert_ring_tail_valid(request->ring, request->tail); + unwind_wa_tail(request); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1513,13 +1618,31 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, if (IS_ERR(cs)) return PTR_ERR(cs); + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, there doesn't seem to be a downside to + * being paranoid and making sure it is set before each batch and + * every context-switch. + * + * Note that if we fail to enable arbitration before the request + * is complete, then we do not see the context-switch interrupt and + * the engine hangs (with RING_HEAD == RING_TAIL). + * + * That satisfies both the GPGPU w/a and our heavy-handed paranoia. + */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* FIXME(BDW): Address space and security selectors. */ *cs++ = MI_BATCH_BUFFER_START_GEN8 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1648,7 +1771,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, */ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *cs++ = MI_NOOP; + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); } @@ -1669,7 +1793,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, @@ -1697,7 +1820,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 314adee7127a..689fde1a63a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -61,6 +61,7 @@ enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, }; /* Logical Rings */ diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index a55954a89148..38572d65e46e 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -939,10 +939,8 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; struct edid *edid; - struct intel_crtc *crtc; i915_reg_t lvds_reg; u32 lvds; - int pipe; u8 pin; u32 allowed_scalers; @@ -1113,22 +1111,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) * on. If so, assume that whatever is currently programmed is the * correct mode. */ - - /* Ironlake: FIXME if still fail, not try pipe mode now */ - if (HAS_PCH_SPLIT(dev_priv)) - goto failed; - - pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - if (crtc && (lvds & LVDS_PORT_EN)) { - fixed_mode = intel_crtc_mode_get(dev, &crtc->base); - if (fixed_mode) { - DRM_DEBUG_KMS("using current (BIOS) mode: "); - drm_mode_debug_printmodeline(fixed_mode); - fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - goto out; - } + fixed_mode = intel_encoder_current_mode(intel_encoder); + if (fixed_mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(fixed_mode); + fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; } /* If we still don't have a mode after all that, give up. */ diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 96043a51c1bf..899839f2f7c6 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -206,11 +206,11 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) static int display_crc_ctl_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - seq_printf(m, "%c %s\n", pipe_name(i), - pipe_crc_source_name(dev_priv->pipe_crc[i].source)); + for_each_pipe(dev_priv, pipe) + seq_printf(m, "%c %s\n", pipe_name(pipe), + pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); return 0; } @@ -775,11 +775,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) return -EINVAL; } -static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe) +static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, + const char *buf, enum pipe *pipe) { const char name = buf[0]; - if (name < 'A' || name >= pipe_name(I915_MAX_PIPES)) + if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) return -EINVAL; *pipe = name - 'A'; @@ -828,7 +829,7 @@ static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, return -EINVAL; } - if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) { + if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c66af09e27a7..2fcff9788b6f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -322,7 +322,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -337,14 +337,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (enable) @@ -353,7 +353,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -2790,11 +2790,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2811,11 +2811,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3608,13 +3608,13 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for the SAGV when enabling */ - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3645,14 +3645,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -4820,16 +4820,18 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, return a->start < b->end && b->start < a->end; } -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore) { - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - if (i != ignore && entries[i] && - skl_ddb_entries_overlap(ddb, entries[i])) + for_each_pipe(dev_priv, pipe) { + if (pipe != ignore && entries[pipe] && + skl_ddb_entries_overlap(ddb, entries[pipe])) return true; + } return false; } @@ -5619,7 +5621,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -5649,7 +5651,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_DDR_DVFS; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(dev, crtc) { @@ -5827,6 +5829,12 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv) { u32 val; + /* Display WA #0477 WaDisableIPC: skl */ + if (IS_SKYLAKE(dev_priv)) { + dev_priv->ipc_enabled = false; + return; + } + val = I915_READ(DISP_ARB_CTL2); if (dev_priv->ipc_enabled) @@ -5980,6 +5988,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv) */ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 limits; /* Only set the down limit when we've reached the lowest level to avoid @@ -5989,13 +5998,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ if (INTEL_GEN(dev_priv) >= 9) { - limits = (dev_priv->rps.max_freq_softlimit) << 23; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= (dev_priv->rps.min_freq_softlimit) << 14; + limits = (rps->max_freq_softlimit) << 23; + if (val <= rps->min_freq_softlimit) + limits |= (rps->min_freq_softlimit) << 14; } else { - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; } return limits; @@ -6003,39 +6012,40 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int new_power; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { + new_power = rps->power; + switch (rps->power) { case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && - val > dev_priv->rps.cur_freq) + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) new_power = BETWEEN; break; case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && - val < dev_priv->rps.cur_freq) + if (val <= rps->efficient_freq && + val < rps->cur_freq) new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && - val > dev_priv->rps.cur_freq) + else if (val >= rps->rp0_freq && + val > rps->cur_freq) new_power = HIGH_POWER; break; case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && - val < dev_priv->rps.cur_freq) + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) new_power = BETWEEN; break; } /* Max/min bins are special */ - if (val <= dev_priv->rps.min_freq_softlimit) + if (val <= rps->min_freq_softlimit) new_power = LOW_POWER; - if (val >= dev_priv->rps.max_freq_softlimit) + if (val >= rps->max_freq_softlimit) new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) + if (new_power == rps->power) return; /* Note the units here are not exactly 1us, but 1280ns. */ @@ -6098,20 +6108,21 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_DOWN_IDLE_AVG); skip_hw_write: - dev_priv->rps.power = new_power; - dev_priv->rps.up_threshold = threshold_up; - dev_priv->rps.down_threshold = threshold_down; - dev_priv->rps.last_adj = 0; + rps->power = new_power; + rps->up_threshold = threshold_up; + rps->down_threshold = threshold_down; + rps->last_adj = 0; } static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 mask = 0; /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > dev_priv->rps.min_freq_softlimit) + if (val > rps->min_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) + if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; mask &= dev_priv->pm_rps_events; @@ -6124,10 +6135,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* min/max delay may still have been modified so be sure to * write the limits value. */ - if (val != dev_priv->rps.cur_freq) { + if (val != rps->cur_freq) { gen6_set_rps_thresholds(dev_priv, val); if (INTEL_GEN(dev_priv) >= 9) @@ -6149,7 +6162,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - dev_priv->rps.cur_freq = val; + rps->cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6165,7 +6178,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - if (val != dev_priv->rps.cur_freq) { + if (val != dev_priv->gt_pm.rps.cur_freq) { err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); if (err) return err; @@ -6173,7 +6186,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) gen6_set_rps_thresholds(dev_priv, val); } - dev_priv->rps.cur_freq = val; + dev_priv->gt_pm.rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6188,10 +6201,11 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - u32 val = dev_priv->rps.idle_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val = rps->idle_freq; int err; - if (dev_priv->rps.cur_freq <= val) + if (rps->cur_freq <= val) return; /* The punit delays the write of the frequency and voltage until it @@ -6216,34 +6230,38 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_busy(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { u8 freq; if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); gen6_enable_rps_interrupts(dev_priv); /* Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(dev_priv->rps.cur_freq, - dev_priv->rps.efficient_freq); + freq = max(rps->cur_freq, + rps->efficient_freq); if (intel_set_rps(dev_priv, clamp(freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit))) + rps->min_freq_softlimit, + rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* Flush our bottom-half so that it does not race with us * setting the idle frequency and so that it is bounded by * our rpm wakeref. And then disable the interrupts to stop any @@ -6251,36 +6269,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); - dev_priv->rps.last_adj = 0; + gen6_set_rps(dev_priv, rps->idle_freq); + rps->last_adj = 0; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { - struct drm_i915_private *i915 = rq->i915; + struct intel_rps *rps = &rq->i915->gt_pm.rps; unsigned long flags; bool boost; /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!i915->rps.enabled) + if (!rps->enabled) return; boost = false; spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !i915_gem_request_completed(rq)) { - atomic_inc(&i915->rps.num_waiters); + atomic_inc(&rps->num_waiters); rq->waitboost = true; boost = true; } @@ -6288,22 +6306,23 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (!boost) return; - if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) - schedule_work(&i915->rps.work); + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); - atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); + atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; - lockdep_assert_held(&dev_priv->rps.hw_lock); - GEM_BUG_ON(val > dev_priv->rps.max_freq); - GEM_BUG_ON(val < dev_priv->rps.min_freq); + lockdep_assert_held(&dev_priv->pcu_lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); - if (!dev_priv->rps.enabled) { - dev_priv->rps.cur_freq = val; + if (!rps->enabled) { + rps->cur_freq = val; return 0; } @@ -6326,21 +6345,30 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { - /* we're doing forcewake before Disabling RC6, + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) +{ + /* We're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6349,6 +6377,11 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +{ + I915_WRITE(GEN6_RP_CONTROL, 0); +} + static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -6471,24 +6504,26 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(dev_priv)) { u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; } /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + rps->max_freq = rps->rp0_freq; - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; @@ -6496,33 +6531,34 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (sandybridge_pcode_read(dev_priv, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, &ddcc_status) == 0) - dev_priv->rps.efficient_freq = + rps->efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), - dev_priv->rps.min_freq, - dev_priv->rps.max_freq); + rps->min_freq, + rps->max_freq); } if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ - dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; } } static void reset_rps(struct drm_i915_private *dev_priv, int (*set)(struct drm_i915_private *, u8)) { - u8 freq = dev_priv->rps.cur_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u8 freq = rps->cur_freq; /* force a reset */ - dev_priv->rps.power = -1; - dev_priv->rps.cur_freq = -1; + rps->power = -1; + rps->cur_freq = -1; if (set(dev_priv, freq)) DRM_ERROR("Failed to reset RPS to initial values\n"); @@ -6535,7 +6571,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); /* 1 second timeout*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, @@ -6589,7 +6625,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ @@ -6609,7 +6645,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen8_enable_rps(struct drm_i915_private *dev_priv) +static void gen8_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6618,7 +6654,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); - /* 1c & 1d: Get forcewake during program sequence. Although the driver + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6632,36 +6668,38 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + rc6_mask); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void gen8_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ /* Docs recommend 900MHz, and 300 MHz respectively */ I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + rps->max_freq_softlimit << 24 | + rps->min_freq_softlimit << 16); I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ @@ -6670,7 +6708,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | @@ -6679,14 +6717,12 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - /* 6: Ring frequency + overclocking (our driver does this later */ - reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -6695,14 +6731,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) int rc6_mode; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ I915_WRITE(GEN6_RC_STATE, 0); /* Clear the DBG now so we don't confuse earlier errors */ @@ -6736,7 +6764,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(); + rc6_mode = intel_rc6_enabled(); if (rc6_mode & INTEL_RC6_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; @@ -6756,12 +6784,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) GEN6_RC_CTL_EI_MODE(1) | GEN6_RC_CTL_HW_ENABLE); - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); if (IS_GEN6(dev_priv) && ret) { @@ -6779,8 +6801,28 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void gen6_enable_rps(struct drm_i915_private *dev_priv) +{ + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int min_freq = 15; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; @@ -6788,7 +6830,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int scaling_factor = 180; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); policy = cpufreq_cpu_get(0); if (policy) { @@ -6811,11 +6853,11 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq; - max_gpu_freq = dev_priv->rps.max_freq; + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; } /* @@ -7066,17 +7108,18 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { - dev_priv->rps.gpll_ref_freq = + dev_priv->gt_pm.rps.gpll_ref_freq = vlv_get_cck_clock(dev_priv, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, dev_priv->czclk_freq); DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->rps.gpll_ref_freq); + dev_priv->gt_pm.rps.gpll_ref_freq); } static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; valleyview_setup_pctx(dev_priv); @@ -7098,30 +7141,31 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = valleyview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + rps->min_freq = valleyview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; cherryview_setup_pctx(dev_priv); @@ -7142,31 +7186,29 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = cherryview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + rps->min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, "Odd GPU freq values\n"); } @@ -7175,13 +7217,11 @@ static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) valleyview_cleanup_pctx(dev_priv); } -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg, rc6_mode = 0, pcbr; gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); @@ -7212,7 +7252,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC6_COUNT_EN | @@ -7222,13 +7262,22 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && + if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && (pcbr >> VLV_PCBR_ADDR_SHIFT)) rc6_mode = GEN7_RC_CTL_TO_MODE; I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* 4 Program defaults and thresholds for RPS*/ + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1: Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7237,7 +7286,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | @@ -7264,13 +7313,11 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg, rc6_mode = 0; valleyview_check_pctx(dev_priv); @@ -7281,12 +7328,44 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); + + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + if (intel_rc6_enabled() & INTEL_RC6_ENABLE) + rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; + + intel_print_rc6_info(dev_priv, rc6_mode); + + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7303,30 +7382,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - if (intel_enable_rc6() & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - - intel_print_rc6_info(dev_priv, rc6_mode); - - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* Setting Fixed Bias */ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | @@ -7534,7 +7589,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) lockdep_assert_held(&mchdev_lock); - pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; ext_v = pvid_to_extvid(dev_priv, pxvid); @@ -7821,6 +7876,8 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. @@ -7831,7 +7888,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) } mutex_lock(&dev_priv->drm.struct_mutex); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -7842,16 +7899,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = + rps->min_freq_softlimit = max_t(int, - dev_priv->rps.efficient_freq, + rps->efficient_freq, intel_freq_opcode(dev_priv, 450)); /* After setting max-softlimit, find the overclock max freq */ @@ -7862,16 +7919,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); if (params & BIT(31)) { /* OC supported */ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (dev_priv->rps.max_freq & 0xff) * 50, + (rps->max_freq & 0xff) * 50, (params & 0xff) * 50); - dev_priv->rps.max_freq = params & 0xff; + rps->max_freq = params & 0xff; } } /* Finally allow us to boost to max by default */ - dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + rps->boost_freq = rps->max_freq; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_unlock(&dev_priv->drm.struct_mutex); intel_autoenable_gt_powersave(dev_priv); @@ -7899,7 +7956,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + if (cancel_delayed_work_sync(&dev_priv->gt_pm.autoenable_work)) intel_runtime_pm_put(dev_priv); /* gen6_rps_idle() will be called later to disable interrupts */ @@ -7907,90 +7964,168 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - dev_priv->rps.enabled = true; /* force disabling */ + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ + dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); gen6_reset_rps_interrupts(dev_priv); } -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { - if (!READ_ONCE(dev_priv->rps.enabled)) + lockdep_assert_held(&i915->pcu_lock); + + if (!i915->gt_pm.llc_pstate.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Currently there is no HW configuration to be done to disable. */ - if (INTEL_GEN(dev_priv) >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } - - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + i915->gt_pm.llc_pstate.enabled = false; } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +static void intel_disable_rc6(struct drm_i915_private *dev_priv) { - /* We shouldn't be disabling as we submit, so this should be less - * racy than it appears! - */ - if (READ_ONCE(dev_priv->rps.enabled)) + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rc6.enabled) return; - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = false; +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rps.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rps(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rps(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rps(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rps(dev_priv); + else if (IS_IRONLAKE_M(dev_priv)) + ironlake_disable_drps(dev_priv); + + dev_priv->gt_pm.rps.enabled = false; +} + +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->pcu_lock); + + intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); + + mutex_unlock(&dev_priv->pcu_lock); +} + +static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->pcu_lock); + + if (i915->gt_pm.llc_pstate.enabled) + return; + + gen6_update_ring_freq(i915); + + i915->gt_pm.llc_pstate.enabled = true; +} + +static void intel_enable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (dev_priv->gt_pm.rc6.enabled) + return; + + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 9) + gen9_enable_rc6(dev_priv); + else if (IS_BROADWELL(dev_priv)) + gen8_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_enable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = true; +} + +static void intel_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + lockdep_assert_held(&dev_priv->pcu_lock); + + if (rps->enabled) + return; if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) - gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); } - WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); - WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); - dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); + rps->enabled = true; +} + +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +{ + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; + + mutex_lock(&dev_priv->pcu_lock); + + intel_enable_rc6(dev_priv); + intel_enable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_enable_llc_pstate(dev_priv); + + mutex_unlock(&dev_priv->pcu_lock); } static void __intel_autoenable_gt_powersave(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + container_of(work, + typeof(*dev_priv), + gt_pm.autoenable_work.work); struct intel_engine_cs *rcs; struct drm_i915_gem_request *req; - if (READ_ONCE(dev_priv->rps.enabled)) - goto out; - rcs = dev_priv->engine[RCS]; if (rcs->last_retired_context) goto out; @@ -8018,9 +8153,6 @@ out: void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.enabled)) - return; - if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); @@ -8038,7 +8170,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) * runtime resume it's necessary). */ if (queue_delayed_work(dev_priv->wq, - &dev_priv->rps.autoenable_work, + &dev_priv->gt_pm.autoenable_work, round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } @@ -8447,6 +8579,9 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) { + /* The GTT cache must be disabled if the system is using 2M pages. */ + bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, + I915_GTT_PAGE_SIZE_2M); enum pipe pipe; ilk_init_lp_watermarks(dev_priv); @@ -8481,12 +8616,8 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); - /* - * WaGttCachingOffByDefault:bdw - * GTT cache may not work with big pages, so if those - * are ever enabled GTT cache may need to be disabled. - */ - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + /* WaGttCachingOffByDefault:bdw */ + I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); /* WaKVMNotificationOnConfigChange:bdw */ I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) @@ -9067,7 +9198,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9114,7 +9245,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -9191,7 +9322,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9233,31 +9364,39 @@ out: static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val - 0xb7 * Slow = Fast = GPLL ref * N */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val / 2 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; } int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -9288,14 +9427,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->pcu_lock); - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + INIT_DELAYED_WORK(&dev_priv->gt_pm.autoenable_work, __intel_autoenable_gt_powersave); - atomic_set(&dev_priv->rps.num_waiters, 0); + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->pm.suspended = false; - atomic_set(&dev_priv->pm.wakeref_count, 0); + dev_priv->runtime_pm.suspended = false; + atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, @@ -9348,7 +9487,7 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, { u64 time_hw, units, div; - if (!intel_enable_rc6()) + if (!intel_rc6_enabled()) return 0; intel_runtime_pm_get(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 05c08b0bc172..4285f09ff8b8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -579,7 +579,16 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - /* Try to restore the logical GPU state to match the continuation + /* + * RC6 must be prevented until the reset is complete and the engine + * reinitialised. If it occurs in the middle of this sequence, the + * state written to/loaded from the power context is ill-defined (e.g. + * the PP_BASE_DIR may be lost). + */ + assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); + + /* + * Try to restore the logical GPU state to match the continuation * of the request queue. If we skip the context/PD restore, then * the next request may try to execute assuming that its context * is valid and loaded on the GPU and so may try to access invalid diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 56d7ae9f298b..17186f067408 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -7,6 +7,8 @@ #include "i915_gem_timeline.h" #include "i915_selftest.h" +struct drm_printer; + #define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -238,6 +240,11 @@ struct intel_engine_execlists { #define EXECLIST_MAX_PORTS 2 } port[EXECLIST_MAX_PORTS]; + /** + * @preempt: are we currently handling a preempting context switch? + */ + bool preempt; + /** * @port_mask: number of execlist ports - 1 */ @@ -834,4 +841,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); +void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 7933d1bc6a1c..8af286c63d3b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -187,7 +187,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; bool is_enabled; - if (dev_priv->pm.suspended) + if (dev_priv->runtime_pm.suspended) return false; is_enabled = true; @@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = power_well->id; bool wait_fuses = power_well->hsw.has_fuses; - enum skl_power_gate pg; + enum skl_power_gate uninitialized_var(pg); u32 val; if (wait_fuses) { @@ -785,7 +785,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : PUNIT_PWRGT_PWR_GATE(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -806,7 +806,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -833,7 +833,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(power_well_id); ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -852,7 +852,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1364,7 +1364,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); /* @@ -1381,7 +1381,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1396,7 +1396,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) @@ -1417,7 +1417,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, @@ -2809,6 +2809,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 6. Enable DBUF */ gen9_dbuf_enable(dev_priv); + + if (resume && dev_priv->csr.dmc_payload) + intel_csr_load_program(dev_priv); } static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) @@ -3125,7 +3128,7 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); } @@ -3159,7 +3162,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) return false; } - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); return true; @@ -3190,7 +3193,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) assert_rpm_wakelock_held(dev_priv); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -3207,7 +3210,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7d971cb56116..75c872bb8cc9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -81,7 +81,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -95,7 +95,7 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -125,7 +125,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 28a1209d87e2..86fc9b529f2d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -66,7 +66,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +/* FIXME: We should instead only take spinlocks once for the entire update + * instead of once per mmio. */ +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +#define VBLANK_EVASION_TIME_US 250 +#else #define VBLANK_EVASION_TIME_US 100 +#endif /** * intel_pipe_update_start() - start update of a set of display registers diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 277477890240..7b938e822fde 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,22 +22,9 @@ * */ -#include "i915_drv.h" #include "intel_uc.h" -#include - -/* Cleans up uC firmware by releasing the firmware GEM obj. - */ -static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&uc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} +#include "i915_drv.h" +#include "i915_guc_submission.h" /* Reset GuC providing us with fresh state for both GuC and HuC. */ @@ -94,198 +81,34 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } -static void gen8_guc_raise_irq(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); -} - void intel_uc_init_early(struct drm_i915_private *dev_priv) { - struct intel_guc *guc = &dev_priv->guc; - - intel_guc_ct_init_early(&guc->ct); - - mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_nop; - guc->notify = gen8_guc_raise_irq; -} - -static void fetch_uc_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - if (!uc_fw->path) - return; - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a - * well-known offset within the firmware blob; note that major / minor - * version are TWO bytes each (i.e. u16), although all pointers and - * offsets are defined in terms of bytes (u8). - */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping %s firmware version check\n", - intel_uc_fw_type_repr(uc_fw->type)); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; + intel_guc_init_early(&dev_priv->guc); } void intel_uc_init_fw(struct drm_i915_private *dev_priv) { - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { - __intel_uc_fw_fini(&dev_priv->guc.fw); - __intel_uc_fw_fini(&dev_priv->huc.fw); + intel_uc_fw_fini(&dev_priv->guc.fw); + intel_uc_fw_fini(&dev_priv->huc.fw); } -static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +/** + * intel_uc_init_mmio - setup uC MMIO access + * + * @dev_priv: device private + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct drm_i915_private *dev_priv) { - GEM_BUG_ON(!guc->send_regs.base); - GEM_BUG_ON(!guc->send_regs.count); - GEM_BUG_ON(i >= guc->send_regs.count); - - return _MMIO(guc->send_regs.base + 4 * i); -} - -static void guc_init_send_regs(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - enum forcewake_domains fw_domains = 0; - unsigned int i; - - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); - guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; - - for (i = 0; i < guc->send_regs.count; i++) { - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - guc_send_reg(guc, i), - FW_REG_READ | FW_REG_WRITE); - } - guc->send_regs.fw_domains = fw_domains; + intel_guc_init_send_regs(&dev_priv->guc); } static void guc_capture_load_err_log(struct intel_guc *guc) @@ -309,8 +132,6 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_init_send_regs(guc); - if (HAS_GUC_CT(dev_priv)) return intel_guc_enable_ct(guc); @@ -328,27 +149,6 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } -/** - * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode - * @guc: intel_guc structure - * @rsa_offset: rsa offset w.r.t ggtt base of huc vma - * - * Triggers a HuC firmware authentication request to the GuC via intel_guc_send - * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by - * intel_huc_auth(). - * - * Return: non-zero code on error - */ -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) -{ - u32 action[] = { - INTEL_GUC_ACTION_AUTHENTICATE_HUC, - rsa_offset - }; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} - int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -480,82 +280,3 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) i915_ggtt_disable_guc(dev_priv); } - -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} - -/* - * This function implements the MMIO based host to GuC interface. - */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 status; - int i; - int ret; - - GEM_BUG_ON(!len); - GEM_BUG_ON(len > guc->send_regs.count); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(HAS_GUC_CT(dev_priv) && - *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); - - mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); - - for (i = 0; i < len; i++) - I915_WRITE(guc_send_reg(guc, i), action[i]); - - POSTING_READ(guc_send_reg(guc, i - 1)); - - intel_guc_notify(guc); - - /* - * No GuC command should ever take longer than 10ms. - * Fast commands should still complete in 10us. - */ - ret = __intel_wait_for_register_fw(dev_priv, - guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, - 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; - - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); - } - - intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); - mutex_unlock(&guc->send_mutex); - - return ret; -} - -int intel_guc_sample_forcewake(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 action[2]; - - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - action[1] = 0; - else - /* bit 0 and 1 are for Render and Media domain separately */ - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 6966349ed737..e18d3bb02088 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -24,237 +24,15 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ -#include "intel_guc_fwif.h" -#include "i915_guc_reg.h" -#include "intel_ringbuffer.h" -#include "intel_guc_ct.h" -#include "i915_vma.h" +#include "intel_guc.h" +#include "intel_huc.h" -struct drm_i915_gem_request; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - */ -struct i915_guc_client { - struct i915_vma *vma; - void *vaddr; - struct i915_gem_context *owner; - struct intel_guc *guc; - - uint32_t engines; /* bitmap of (host) engine ids */ - uint32_t priority; - u32 stage_id; - uint32_t proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - - spinlock_t wq_lock; - /* Per-engine counts of GuC submissions */ - uint64_t submissions[I915_NUM_ENGINES]; -}; - -enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -1, - INTEL_UC_FIRMWARE_NONE = 0, - INTEL_UC_FIRMWARE_PENDING, - INTEL_UC_FIRMWARE_SUCCESS -}; - -/* User-friendly representation of an enum */ -static inline -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - } - return ""; -} - -enum intel_uc_fw_type { - INTEL_UC_FW_TYPE_GUC, - INTEL_UC_FW_TYPE_HUC -}; - -/* User-friendly representation of an enum */ -static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) -{ - switch (type) { - case INTEL_UC_FW_TYPE_GUC: - return "GuC"; - case INTEL_UC_FW_TYPE_HUC: - return "HuC"; - } - return "uC"; -} - -/* - * This structure encapsulates all the data needed during the process - * of fetching, caching, and loading the firmware image into the GuC. - */ -struct intel_uc_fw { - const char *path; - size_t size; - struct drm_i915_gem_object *obj; - enum intel_uc_fw_status fetch_status; - enum intel_uc_fw_status load_status; - - uint16_t major_ver_wanted; - uint16_t minor_ver_wanted; - uint16_t major_ver_found; - uint16_t minor_ver_found; - - enum intel_uc_fw_type type; - uint32_t header_size; - uint32_t header_offset; - uint32_t rsa_size; - uint32_t rsa_offset; - uint32_t ucode_size; - uint32_t ucode_offset; -}; - -struct intel_guc_log { - uint32_t flags; - struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ - struct { - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - } runtime; - /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; -}; - -struct intel_guc { - struct intel_uc_fw fw; - struct intel_guc_log log; - struct intel_guc_ct ct; - - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - - /* intel_guc_recv interrupt related state */ - bool interrupts_enabled; - - struct i915_vma *ads_vma; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; - struct ida stage_ids; - - struct i915_guc_client *execbuf_client; - - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ - - /* GuC's FW specific registers used in MMIO send */ - struct { - u32 base; - unsigned int count; - enum forcewake_domains fw_domains; - } send_regs; - - /* To serialize the intel_guc_send actions */ - struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); -}; - -struct intel_huc { - /* Generic uC firmware management */ - struct intel_uc_fw fw; - - /* HuC-specific additions */ -}; - -/* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -int intel_guc_sample_forcewake(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); - -static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) -{ - return guc->send(guc, action, len); -} - -static inline void intel_guc_notify(struct intel_guc *guc) -{ - guc->notify(guc); -} - -/* intel_guc_loader.c */ -int intel_guc_select_fw(struct intel_guc *guc); -int intel_guc_init_hw(struct intel_guc *guc); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); - -/* i915_guc_submission.c */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv); -int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -void i915_guc_submission_disable(struct drm_i915_private *dev_priv); -void i915_guc_submission_fini(struct drm_i915_private *dev_priv); -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); - -/* intel_guc_log.c */ -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); - -static inline u32 guc_ggtt_offset(struct i915_vma *vma) -{ - u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); - return offset; -} - -/* intel_huc.c */ -void intel_huc_select_fw(struct intel_huc *huc); -void intel_huc_init_hw(struct intel_huc *huc); -void intel_huc_auth(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c new file mode 100644 index 000000000000..766b1cbdfbd7 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -0,0 +1,193 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "intel_uc_fw.h" +#include "i915_drv.h" + +/** + * intel_uc_fw_fetch - fetch uC firmware + * + * @dev_priv: device private + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + */ +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + if (!uc_fw->path) + return; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) + goto fail; + if (!fw) + goto fail; + + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", + uc_fw->path, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_NOTE("Firmware header is missing\n"); + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * + sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_NOTE("CSS header definition mismatch\n"); + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + DRM_NOTE("RSA key size is bad\n"); + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_NOTE("Missing firmware components\n"); + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + /* Header and uCode will be loaded to WOPCM. Size of the two. */ + size = uc_fw->header_size + uc_fw->ucode_size; + + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + goto fail; + } + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); + err = -ENOEXEC; + goto fail; + } + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("Skipping %s firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", + uc_fw->obj); + + release_firmware(fw); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + return; + +fail: + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", + uc_fw->path, err); + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, uc_fw->obj); + + release_firmware(fw); /* OK even if fw is NULL */ + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; +} + +/** + * intel_uc_fw_fini - cleanup uC firmware + * + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h new file mode 100644 index 000000000000..c3e9af4b9bf0 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -0,0 +1,107 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +struct drm_i915_private; + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_FAIL = -1, + INTEL_UC_FIRMWARE_NONE = 0, + INTEL_UC_FIRMWARE_PENDING, + INTEL_UC_FIRMWARE_SUCCESS +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC, + INTEL_UC_FW_TYPE_HUC +}; + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + const char *path; + size_t size; + struct drm_i915_gem_object *obj; + enum intel_uc_fw_status fetch_status; + enum intel_uc_fw_status load_status; + + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; + + enum intel_uc_fw_type type; + u32 header_size; + u32 header_offset; + u32 rsa_size; + u32 rsa_offset; + u32 ucode_size; + u32 ucode_offset; +}; + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + } + return ""; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline +void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +{ + uc_fw->path = NULL; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->type = type; +} + +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b3c3f94fc7e4..983617b5b338 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -626,7 +626,23 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv) if (!dev_priv->uncore.funcs.force_wake_get) return; - WARN_ON(dev_priv->uncore.fw_domains_active); + WARN(dev_priv->uncore.fw_domains_active, + "Expected all fw_domains to be inactive, but %08x are still on\n", + dev_priv->uncore.fw_domains_active); +} + +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + if (!dev_priv->uncore.funcs.force_wake_get) + return; + + assert_rpm_wakelock_held(dev_priv); + + fw_domains &= dev_priv->uncore.fw_domains; + WARN(fw_domains & ~dev_priv->uncore.fw_domains_active, + "Expected %08x fw_domains to be active, but %08x are off\n", + fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active); } /* We give fast paths for the really cool registers */ diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 03786f931905..582771251b57 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -25,6 +25,12 @@ #ifndef __INTEL_UNCORE_H__ #define __INTEL_UNCORE_H__ +#include +#include +#include + +#include "i915_reg.h" + struct drm_i915_private; enum forcewake_domain_id { @@ -131,6 +137,8 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); enum forcewake_domains diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index c5c7e8efbdd3..a2632df39173 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -37,8 +37,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -huge_get_pages(struct drm_i915_gem_object *obj) +static int huge_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) const unsigned long nreal = obj->scratch / PAGE_SIZE; @@ -49,11 +48,11 @@ huge_get_pages(struct drm_i915_gem_object *obj) pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if (sg_alloc_table(pages, npages, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = pages->sgl; @@ -81,11 +80,14 @@ huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - return pages; + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + + return 0; err: huge_free_pages(obj, pages); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c new file mode 100644 index 000000000000..c53f8474113a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -0,0 +1,1734 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include + +#include "mock_drm.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_page_sizes |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_page_sizes |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %lu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + const int gen = INTEL_GEN(vma->vm->i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct drm_i915_gem_request *rq; + struct i915_vma *batch; + int flags = 0; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_request; + } + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + +err_request: + __i915_add_request(rq, err == 0); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_obj_finish_shmem_access(obj); + + return err; +} + +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct intel_engine_cs *engine; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", + id); + continue; + } + + /* + * Try various offsets until we timeout -- we want to avoid + * issues hidden by effectively always using offset = 0. + */ + for_each_prime_number_from(num, 0, max) { + u64 offset = num * max_page_size; + u32 dword; + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, max_page_size, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) { + err = 0; + continue; + } + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + dword = offset_in_page(num) / 4; + + err = gpu_write(vma, ctx, engine, dword, num + 1); + if (err) { + pr_err("gpu-write failed at offset=%llx", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, num + 1); + if (err) { + pr_err("cpu-check failed at offset=%llx", offset); + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + + if (num > 0 && + igt_timeout(end_time, + "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n", + __func__, id, offset, max_page_size)) + break; + } + } + +out_vma_unpin: + if (i915_vma_is_pinned(vma)) + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!USES_FULL_48BIT_PPGTT(dev_priv)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (!IS_ERR_OR_NULL(obj->mm.pages)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + int saved_ppgtt = i915_modparams.enable_ppgtt; + struct drm_i915_private *dev_priv; + struct pci_dev *pdev; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + i915_modparams.enable_ppgtt = 3; + + pdev = dev_priv->drm.pdev; + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_48bit(&ppgtt->base)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->base)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + i915_modparams.enable_ppgtt = saved_ppgtt; + + drm_dev_unref(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + int err; + + if (!USES_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + err = i915_subtests(tests, ctx); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6b132caffa18..9da0c9f99916 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -39,25 +39,26 @@ static void fake_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -fake_get_pages(struct drm_i915_gem_object *obj) +static int fake_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; + unsigned int sg_page_sizes; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rem = round_up(obj->base.size, BIT(31)) >> 31; if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } + sg_page_sizes = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -66,13 +67,17 @@ fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; + sg_page_sizes |= len; rem -= len; } GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; - return pages; + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 8f011c447e41..1b8774a42e48 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return PTR_ERR(io); } - err = i915_vma_get_fence(vma); - if (err) { - pr_err("Failed to get fence for partial view: offset=%lu\n", - page); - i915_vma_unpin_iomap(vma); - return err; - } - iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); i915_vma_unpin_iomap(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6664cb2eb0b8..a999161e8db1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -215,7 +215,9 @@ static int igt_request_rewind(void *arg) } i915_gem_request_get(vip); i915_add_request(vip); + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); mutex_unlock(&i915->drm.struct_mutex); @@ -418,7 +420,10 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) err = PTR_ERR(cmd); goto err; } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -605,8 +610,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) *cmd++ = lower_32_bits(vma->node.start); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); - wmb(); i915_gem_object_unpin_map(obj); return vma; @@ -625,7 +630,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(batch->vm->i915); i915_gem_object_unpin_map(batch->obj); @@ -858,7 +863,8 @@ out_request: I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(request[id]->batch->obj); } diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 18b174d855ca..64acd7eccc5c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,5 +15,6 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index fc74687501ba..9961b44f76ed 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -21,3 +21,4 @@ selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) +selftest(hugepages, i915_gem_huge_page_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 377c1de766ce..71ce06680d66 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -165,6 +165,7 @@ static int emit_recurse_batch(struct hang *h, *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + i915_gem_chipset_flush(h->i915); flags = 0; if (INTEL_GEN(vm->i915) <= 5) @@ -231,7 +232,7 @@ static u32 hws_seqno(const struct hang *h, static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(h->i915); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -275,6 +276,8 @@ static int igt_hang_sanitycheck(void *arg) i915_gem_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + __i915_add_request(rq, true); timeout = i915_wait_request(rq, @@ -621,8 +624,11 @@ static int igt_wait_reset(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -713,8 +719,12 @@ static int igt_reset_queue(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, prev)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(rq->engine, &p); + i915_gem_request_put(rq); i915_gem_request_put(prev); @@ -765,7 +775,7 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); i915_gem_request_put(prev); } @@ -815,8 +825,11 @@ static int igt_handle_error(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -865,9 +878,16 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), }; + int err; if (!intel_has_gpu_reset(i915)) return 0; - return i915_subtests(tests, i915); + intel_runtime_pm_get(i915); + + err = i915_subtests(tests, i915); + + intel_runtime_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2388424a14da..04eb9362f4f8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -83,6 +83,8 @@ static void mock_device_release(struct drm_device *dev) kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); + i915_gemfs_fini(i915); + drm_dev_fini(&i915->drm); put_device(&i915->drm.pdev->dev); } @@ -146,7 +148,7 @@ struct drm_i915_private *mock_gem_device(void) dev_set_name(&pdev->dev, "mock"); dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); -#if IS_ENABLED(CONFIG_IOMMU_API) +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) /* hack to disable iommu for the fake device; force identity mapping */ pdev->dev.archdata.iommu = (void *)-1; #endif @@ -172,6 +174,11 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; + mkwrite_device_info(i915)->page_sizes = + I915_GTT_PAGE_SIZE_4K | + I915_GTT_PAGE_SIZE_64K | + I915_GTT_PAGE_SIZE_2M; + spin_lock_init(&i915->mm.object_stat_lock); mock_uncore_init(i915); @@ -239,8 +246,16 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->kernel_context) goto err_engine; + i915->preempt_context = mock_context(i915, NULL); + if (!i915->preempt_context) + goto err_kernel_context; + + WARN_ON(i915_gemfs_init(i915)); + return i915; +err_kernel_context: + i915_gem_context_put(i915->kernel_context); err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index f2118cf535a0..336e1afb250f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,6 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->pages = vma->obj->mm.pages; vma->flags |= I915_VMA_LOCAL_BIND; return 0; } @@ -84,6 +83,8 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->base.insert_entries = mock_insert_entries; ppgtt->base.bind_vma = mock_bind_ppgtt; ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = mock_cleanup; return ppgtt; @@ -93,12 +94,6 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int err; - - err = i915_get_ggtt_vma_pages(vma); - if (err) - return err; - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -124,6 +119,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.insert_entries = mock_insert_entries; ggtt->base.bind_vma = mock_bind_ggtt; ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = mock_cleanup; i915_address_space_init(&ggtt->base, i915, "global"); diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index 1cc5d2931753..cd6d2a16071f 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -189,6 +189,20 @@ static unsigned int random(unsigned long n, return 1 + (prandom_u32_state(rnd) % 1024); } +static unsigned int random_page_size_pages(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + /* 4K, 64K, 2M */ + static unsigned int page_count[] = { + BIT(12) >> PAGE_SHIFT, + BIT(16) >> PAGE_SHIFT, + BIT(21) >> PAGE_SHIFT, + }; + + return page_count[(prandom_u32_state(rnd) % 3)]; +} + static inline bool page_contiguous(struct page *first, struct page *last, unsigned long npages) @@ -252,6 +266,7 @@ static const npages_fn_t npages_funcs[] = { grow, shrink, random, + random_page_size_pages, NULL, }; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index b6c3540e07bc..0937d9a7d8fb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -53,6 +53,8 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); +extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, + const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index fe25a01c81f2..125bde7d9504 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_MIN_EU_IN_POOL 39 #define I915_PARAM_MMAP_GTT_VERSION 40 -/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. */ #define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + #define I915_PARAM_HUC_STATUS 42 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of @@ -1308,7 +1318,7 @@ struct drm_i915_reg_read { * be specified */ __u64 offset; -#define I915_REG_READ_8B_WA BIT(0) +#define I915_REG_READ_8B_WA (1ul << 0) __u64 val; /* Return value */ }; @@ -1360,6 +1370,10 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 #define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ __u64 value; }; diff --git a/mm/shmem.c b/mm/shmem.c index 07a1d22807be..3229d27503ec 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4183,7 +4183,7 @@ static const struct dentry_operations anon_ops = { .d_dname = simple_dname }; -static struct file *__shmem_file_setup(const char *name, loff_t size, +static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags, unsigned int i_flags) { struct file *res; @@ -4192,8 +4192,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, struct super_block *sb; struct qstr this; - if (IS_ERR(shm_mnt)) - return ERR_CAST(shm_mnt); + if (IS_ERR(mnt)) + return ERR_CAST(mnt); if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); @@ -4205,8 +4205,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, this.name = name; this.len = strlen(name); this.hash = 0; /* will go */ - sb = shm_mnt->mnt_sb; - path.mnt = mntget(shm_mnt); + sb = mnt->mnt_sb; + path.mnt = mntget(mnt); path.dentry = d_alloc_pseudo(sb, &this); if (!path.dentry) goto put_memory; @@ -4251,7 +4251,7 @@ put_path: */ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) { - return __shmem_file_setup(name, size, flags, S_PRIVATE); + return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); } /** @@ -4262,10 +4262,24 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon */ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) { - return __shmem_file_setup(name, size, flags, 0); + return __shmem_file_setup(shm_mnt, name, size, flags, 0); } EXPORT_SYMBOL_GPL(shmem_file_setup); +/** + * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs + * @mnt: the tmpfs mount where the file will be created + * @name: name for dentry (to be seen in /proc//maps + * @size: size to be set for the file + * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size + */ +struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, + loff_t size, unsigned long flags) +{ + return __shmem_file_setup(mnt, name, size, flags, 0); +} +EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); + /** * shmem_zero_setup - setup a shared anonymous mapping * @vma: the vma to be mmapped is prepared by do_mmap_pgoff @@ -4281,7 +4295,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ - file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE); + file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); if (IS_ERR(file)) return PTR_ERR(file);