diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 39573c5f7518..955555d6ec88 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -168,6 +168,17 @@ config DRM_I915_KMS the driver to bind to PCI devices, which precludes loading things like intelfb. +config DRM_I915_PRELIMINARY_HW_SUPPORT + bool "Enable preliminary support for prerelease Intel hardware by default" + depends on DRM_I915 + help + Choose this option if you have prerelease Intel hardware and want the + i915 driver to support it by default. You can enable such support at + runtime with the module option i915.preliminary_hw_support=1; this + option changes the default for that module option. + + If in doubt, say "N". + config DRM_MGA tristate "Matrox g200/g400" depends on DRM && PCI diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index aded1e11e8ff..af93cc55259f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -254,6 +254,9 @@ void drm_mm_remove_node(struct drm_mm_node *node) struct drm_mm *mm = node->mm; struct drm_mm_node *prev_node; + if (WARN_ON(!node->allocated)) + return; + BUG_ON(node->scanned_block || node->scanned_prev_free || node->scanned_next_free); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1a87cc9fd899..55ab9246e1b9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "intel_drv.h" #include "intel_ringbuffer.h" @@ -99,7 +100,7 @@ static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { struct i915_vma *vma; - seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %d %d %d%s%s%s", + seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s", &obj->base, get_pin_flag(obj), get_tiling_flag(obj), @@ -117,6 +118,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (name: %d)", obj->base.name); if (obj->pin_count) seq_printf(m, " (pinned x %d)", obj->pin_count); + if (obj->pin_display) + seq_printf(m, " (display)"); if (obj->fence_reg != I915_FENCE_REG_NONE) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, vma_link) { @@ -193,9 +196,9 @@ static int obj_rank_by_stolen(void *priv, struct list_head *A, struct list_head *B) { struct drm_i915_gem_object *a = - container_of(A, struct drm_i915_gem_object, exec_list); + container_of(A, struct drm_i915_gem_object, obj_exec_link); struct drm_i915_gem_object *b = - container_of(B, struct drm_i915_gem_object, exec_list); + container_of(B, struct drm_i915_gem_object, obj_exec_link); return a->stolen->start - b->stolen->start; } @@ -219,7 +222,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) if (obj->stolen == NULL) continue; - list_add(&obj->exec_list, &stolen); + list_add(&obj->obj_exec_link, &stolen); total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_ggtt_size(obj); @@ -229,7 +232,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) if (obj->stolen == NULL) continue; - list_add(&obj->exec_list, &stolen); + list_add(&obj->obj_exec_link, &stolen); total_obj_size += obj->base.size; count++; @@ -237,11 +240,11 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) list_sort(NULL, &stolen, obj_rank_by_stolen); seq_puts(m, "Stolen:\n"); while (!list_empty(&stolen)) { - obj = list_first_entry(&stolen, typeof(*obj), exec_list); + obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link); seq_puts(m, " "); describe_obj(m, obj); seq_putc(m, '\n'); - list_del_init(&obj->exec_list); + list_del_init(&obj->obj_exec_link); } mutex_unlock(&dev->struct_mutex); @@ -1767,6 +1770,52 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) return 0; } +static int i915_energy_uJ(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u64 power; + u32 units; + + if (INTEL_INFO(dev)->gen < 6) + return -ENODEV; + + rdmsrl(MSR_RAPL_POWER_UNIT, power); + power = (power & 0x1f00) >> 8; + units = 1000000 / (1 << power); /* convert to uJ */ + power = I915_READ(MCH_SECP_NRG_STTS); + power *= units; + + seq_printf(m, "%llu", (long long unsigned)power); + + return 0; +} + +static int i915_pc8_status(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!IS_HASWELL(dev)) { + seq_puts(m, "not supported\n"); + return 0; + } + + mutex_lock(&dev_priv->pc8.lock); + seq_printf(m, "Requirements met: %s\n", + yesno(dev_priv->pc8.requirements_met)); + seq_printf(m, "GPU idle: %s\n", yesno(dev_priv->pc8.gpu_idle)); + seq_printf(m, "Disable count: %d\n", dev_priv->pc8.disable_count); + seq_printf(m, "IRQs disabled: %s\n", + yesno(dev_priv->pc8.irqs_disabled)); + seq_printf(m, "Enabled: %s\n", yesno(dev_priv->pc8.enabled)); + mutex_unlock(&dev_priv->pc8.lock); + + return 0; +} + static int i915_wedged_get(void *data, u64 *val) { @@ -2206,6 +2255,8 @@ static struct drm_info_list i915_debugfs_list[] = { {"i915_dpio", i915_dpio_info, 0}, {"i915_llc", i915_llc, 0}, {"i915_edp_psr_status", i915_edp_psr_status, 0}, + {"i915_energy_uJ", i915_energy_uJ, 0}, + {"i915_pc8_status", i915_pc8_status, 0}, }; #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 54f86242e80e..3e4e6073d171 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_LLC: value = HAS_LLC(dev); break; + case I915_PARAM_HAS_WT: + value = HAS_WT(dev); + break; case I915_PARAM_HAS_ALIASING_PPGTT: value = dev_priv->mm.aliasing_ppgtt ? 1 : 0; break; @@ -1483,8 +1486,24 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) mutex_init(&dev_priv->rps.hw_lock); mutex_init(&dev_priv->modeset_restore_lock); + mutex_init(&dev_priv->pc8.lock); + dev_priv->pc8.requirements_met = false; + dev_priv->pc8.gpu_idle = false; + dev_priv->pc8.irqs_disabled = false; + dev_priv->pc8.enabled = false; + dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */ + INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work); + i915_dump_device_info(dev_priv); + /* Not all pre-production machines fall into this category, only the + * very first ones. Almost everything should work, except for maybe + * suspend/resume. And we don't implement workarounds that affect only + * pre-production machines. */ + if (IS_HSW_EARLY_SDV(dev)) + DRM_INFO("This is an early pre-production Haswell machine. " + "It may not be fully functional.\n"); + if (i915_get_bridge_dev(dev)) { ret = -EIO; goto free_priv; @@ -1677,8 +1696,13 @@ int i915_driver_unload(struct drm_device *dev) intel_gpu_ips_teardown(); - if (HAS_POWER_WELL(dev)) + if (HAS_POWER_WELL(dev)) { + /* The i915.ko module is still not prepared to be loaded when + * the power well is not enabled, so just enable it in case + * we're going to unload/reload. */ + intel_set_power_well(dev, true); i915_remove_power_well(dev); + } i915_teardown_sysfs(dev); @@ -1724,6 +1748,8 @@ int i915_driver_unload(struct drm_device *dev) cancel_work_sync(&dev_priv->gpu_error.work); i915_destroy_error_state(dev); + cancel_delayed_work_sync(&dev_priv->pc8.enable_work); + if (dev->pdev->msi_enabled) pci_disable_msi(dev->pdev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index eec47bd00353..735dd5625e9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -122,10 +122,10 @@ int i915_enable_psr __read_mostly = 0; module_param_named(enable_psr, i915_enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)"); -unsigned int i915_preliminary_hw_support __read_mostly = 0; +unsigned int i915_preliminary_hw_support __read_mostly = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT); module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600); MODULE_PARM_DESC(preliminary_hw_support, - "Enable preliminary hardware support. (default: false)"); + "Enable preliminary hardware support."); int i915_disable_power_well __read_mostly = 1; module_param_named(disable_power_well, i915_disable_power_well, int, 0600); @@ -141,6 +141,14 @@ module_param_named(fastboot, i915_fastboot, bool, 0600); MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time " "(default: false)"); +int i915_enable_pc8 __read_mostly = 1; +module_param_named(enable_pc8, i915_enable_pc8, int, 0600); +MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: true)"); + +int i915_pc8_timeout __read_mostly = 5000; +module_param_named(pc8_timeout, i915_pc8_timeout, int, 0600); +MODULE_PARM_DESC(pc8_timeout, "Number of msecs of idleness required to enter PC8+ (default: 5000)"); + bool i915_prefault_disable __read_mostly; module_param_named(prefault_disable, i915_prefault_disable, bool, 0600); MODULE_PARM_DESC(prefault_disable, @@ -557,6 +565,9 @@ static int i915_drm_freeze(struct drm_device *dev) dev_priv->modeset_restore = MODESET_SUSPENDED; mutex_unlock(&dev_priv->modeset_restore_lock); + /* We do a lot of poking in a lot of registers, make sure they work + * properly. */ + hsw_disable_package_c8(dev_priv); intel_set_power_well(dev, true); drm_kms_helper_poll_disable(dev); @@ -713,6 +724,10 @@ static int __i915_drm_thaw(struct drm_device *dev) schedule_work(&dev_priv->console_resume_work); } + /* Undo what we did at i915_drm_freeze so the refcount goes back to the + * expected level. */ + hsw_enable_package_c8(dev_priv); + mutex_lock(&dev_priv->modeset_restore_lock); dev_priv->modeset_restore = MODESET_DONE; mutex_unlock(&dev_priv->modeset_restore_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d54354421538..f22c81d040c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -455,6 +455,7 @@ enum i915_cache_level { caches, eg sampler/render caches, and the large Last-Level-Cache. LLC is coherent with the CPU, but L3 is only visible to the GPU. */ + I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ }; typedef uint32_t gen6_gtt_pte_t; @@ -563,6 +564,10 @@ struct i915_vma { struct list_head mm_list; struct list_head vma_link; /* Link in the object's VMA list */ + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + }; struct i915_ctx_hang_stats { @@ -1072,6 +1077,75 @@ struct intel_wm_level { uint32_t fbc_val; }; +/* + * This struct tracks the state needed for the Package C8+ feature. + * + * Package states C8 and deeper are really deep PC states that can only be + * reached when all the devices on the system allow it, so even if the graphics + * device allows PC8+, it doesn't mean the system will actually get to these + * states. + * + * Our driver only allows PC8+ when all the outputs are disabled, the power well + * is disabled and the GPU is idle. When these conditions are met, we manually + * do the other conditions: disable the interrupts, clocks and switch LCPLL + * refclk to Fclk. + * + * When we really reach PC8 or deeper states (not just when we allow it) we lose + * the state of some registers, so when we come back from PC8+ we need to + * restore this state. We don't get into PC8+ if we're not in RC6, so we don't + * need to take care of the registers kept by RC6. + * + * The interrupt disabling is part of the requirements. We can only leave the + * PCH HPD interrupts enabled. If we're in PC8+ and we get another interrupt we + * can lock the machine. + * + * Ideally every piece of our code that needs PC8+ disabled would call + * hsw_disable_package_c8, which would increment disable_count and prevent the + * system from reaching PC8+. But we don't have a symmetric way to do this for + * everything, so we have the requirements_met and gpu_idle variables. When we + * switch requirements_met or gpu_idle to true we decrease disable_count, and + * increase it in the opposite case. The requirements_met variable is true when + * all the CRTCs, encoders and the power well are disabled. The gpu_idle + * variable is true when the GPU is idle. + * + * In addition to everything, we only actually enable PC8+ if disable_count + * stays at zero for at least some seconds. This is implemented with the + * enable_work variable. We do this so we don't enable/disable PC8 dozens of + * consecutive times when all screens are disabled and some background app + * queries the state of our connectors, or we have some application constantly + * waking up to use the GPU. Only after the enable_work function actually + * enables PC8+ the "enable" variable will become true, which means that it can + * be false even if disable_count is 0. + * + * The irqs_disabled variable becomes true exactly after we disable the IRQs and + * goes back to false exactly before we reenable the IRQs. We use this variable + * to check if someone is trying to enable/disable IRQs while they're supposed + * to be disabled. This shouldn't happen and we'll print some error messages in + * case it happens, but if it actually happens we'll also update the variables + * inside struct regsave so when we restore the IRQs they will contain the + * latest expected values. + * + * For more, read "Display Sequences for Package C8" on our documentation. + */ +struct i915_package_c8 { + bool requirements_met; + bool gpu_idle; + bool irqs_disabled; + /* Only true after the delayed work task actually enables it. */ + bool enabled; + int disable_count; + struct mutex lock; + struct delayed_work enable_work; + + struct { + uint32_t deimr; + uint32_t sdeimr; + uint32_t gtimr; + uint32_t gtier; + uint32_t gen6_pmimr; + } regsave; +}; + typedef struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1119,6 +1193,7 @@ typedef struct drm_i915_private { /** Cached value of IMR to avoid reads in updating the bitfield */ u32 irq_mask; u32 gt_irq_mask; + u32 pm_irq_mask; struct work_struct hotplug_work; bool enable_hotplug_processing; @@ -1255,6 +1330,8 @@ typedef struct drm_i915_private { uint16_t cur_latency[5]; } wm; + struct i915_package_c8 pc8; + /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; @@ -1312,6 +1389,8 @@ struct drm_i915_gem_object { struct list_head global_list; struct list_head ring_list; + /** Used in execbuf to temporarily hold a ref */ + struct list_head obj_exec_link; /** This object's place in the batchbuffer or on the eviction list */ struct list_head exec_list; @@ -1378,6 +1457,7 @@ struct drm_i915_gem_object { */ unsigned int fault_mappable:1; unsigned int pin_mappable:1; + unsigned int pin_display:1; /* * Is the GPU currently using a fence to access this buffer, @@ -1385,7 +1465,7 @@ struct drm_i915_gem_object { unsigned int pending_fenced_gpu_access:1; unsigned int fenced_gpu_access:1; - unsigned int cache_level:2; + unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; unsigned int has_global_gtt_mapping:1; @@ -1498,7 +1578,6 @@ struct drm_i915_file_private { #define IS_PINEVIEW_M(dev) ((dev)->pci_device == 0xa011) #define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview) #define IS_G33(dev) (INTEL_INFO(dev)->is_g33) -#define IS_IRONLAKE_D(dev) ((dev)->pci_device == 0x0042) #define IS_IRONLAKE_M(dev) ((dev)->pci_device == 0x0046) #define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge) #define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \ @@ -1510,6 +1589,8 @@ struct drm_i915_file_private { #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) +#define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \ + ((dev)->pci_device & 0xFF00) == 0x0C00) #define IS_ULT(dev) (IS_HASWELL(dev) && \ ((dev)->pci_device & 0xFF00) == 0x0A00) @@ -1530,6 +1611,7 @@ struct drm_i915_file_private { #define HAS_BLT(dev) (INTEL_INFO(dev)->has_blt_ring) #define HAS_VEBOX(dev) (INTEL_INFO(dev)->has_vebox_ring) #define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) +#define HAS_WT(dev) (IS_HASWELL(dev) && to_i915(dev)->ellc_size) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) @@ -1552,8 +1634,6 @@ struct drm_i915_file_private { #define SUPPORTS_EDP(dev) (IS_IRONLAKE_M(dev)) #define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) #define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) -/* dsparb controlled by hw only */ -#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IRONLAKE(dev)) #define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2) #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) @@ -1561,8 +1641,6 @@ struct drm_i915_file_private { #define HAS_IPS(dev) (IS_ULT(dev)) -#define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5) - #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi) #define HAS_POWER_WELL(dev) (IS_HASWELL(dev)) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) @@ -1629,6 +1707,8 @@ extern unsigned int i915_preliminary_hw_support __read_mostly; extern int i915_disable_power_well __read_mostly; extern int i915_enable_ips __read_mostly; extern bool i915_fastboot __read_mostly; +extern int i915_enable_pc8 __read_mostly; +extern int i915_pc8_timeout __read_mostly; extern bool i915_prefault_disable __read_mostly; extern int i915_suspend(struct drm_device *dev, pm_message_t state); @@ -1839,7 +1919,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); @@ -1866,6 +1946,7 @@ int __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_ring_buffer *pipelined); +void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj); int i915_gem_attach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj, int id, @@ -1901,6 +1982,9 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct i915_address_space *vm); struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm); +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm); /* Some GGTT VM helpers */ #define obj_to_ggtt(obj) \ (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 498ef8a7bbc7..2d1cb10d846f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,7 +38,8 @@ #include static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force); static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -62,6 +63,20 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +static bool cpu_cache_is_coherent(struct drm_device *dev, + enum i915_cache_level level) +{ + return HAS_LLC(dev) || level != I915_CACHE_NONE; +} + +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return true; + + return obj->pin_display; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -414,8 +429,7 @@ i915_gem_shmem_pread(struct drm_device *dev, * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush = 1; + needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) @@ -731,19 +745,18 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; + needs_clflush_after = cpu_write_needs_clflush(obj); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ret; } } - /* Same trick applies for invalidate partially written cachelines before - * writing. */ - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) - && obj->cache_level == I915_CACHE_NONE) - needs_clflush_before = 1; + /* Same trick applies to invalidate partially written cachelines read + * before writing. */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + needs_clflush_before = + !cpu_cache_is_coherent(dev, obj->cache_level); ret = i915_gem_object_get_pages(obj); if (ret) @@ -822,8 +835,8 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(dev); + if (i915_gem_clflush_object(obj, obj->pin_display)) + i915_gem_chipset_flush(dev); } } @@ -900,9 +913,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + if (obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -986,6 +999,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, bool wait_forever = true; int ret; + WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) return 0; @@ -1251,8 +1266,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); + if (obj->pin_display) + i915_gem_object_flush_cpu_write_domain(obj, true); drm_gem_object_unreference(&obj->base); unlock: @@ -1622,7 +1637,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) * hope for the best. */ WARN_ON(ret != -EIO); - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, true); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -2188,7 +2203,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, offset = i915_gem_obj_offset(request->batch_obj, request_to_vm(request)); - if (ring->hangcheck.action != wait && + if (ring->hangcheck.action != HANGCHECK_WAIT && i915_request_guilty(request, acthd, &inside)) { DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", ring->name, @@ -2593,6 +2608,9 @@ int i915_vma_unbind(struct i915_vma *vma) if (list_empty(&vma->vma_link)) return 0; + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + if (obj->pin_count) return -EBUSY; @@ -2630,6 +2648,8 @@ int i915_vma_unbind(struct i915_vma *vma) obj->map_and_fenceable = true; drm_mm_remove_node(&vma->node); + +destroy: i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if @@ -3088,15 +3108,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - bool mappable, fenceable; size_t gtt_max = map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; - if (WARN_ON(!list_empty(&obj->vma_list))) - return -EBUSY; - fence_size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); @@ -3135,16 +3151,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - /* FIXME: For now we only ever use 1 VMA per object */ BUG_ON(!i915_is_ggtt(vm)); - WARN_ON(!list_empty(&obj->vma_list)); - vma = i915_gem_vma_create(obj, vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; } + /* For now we only ever use 1 vma per object */ + WARN_ON(!list_is_singular(&obj->vma_list)); + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, @@ -3173,18 +3190,19 @@ search_free: list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_add_tail(&vma->mm_list, &vm->inactive_list); - fenceable = - i915_is_ggtt(vm) && - i915_gem_obj_ggtt_size(obj) == fence_size && - (i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0; + if (i915_is_ggtt(vm)) { + bool mappable, fenceable; - mappable = - i915_is_ggtt(vm) && - vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end; + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + obj->base.size <= + dev_priv->gtt.mappable_end); - /* Map and fenceable only changes if the VM is the global GGTT */ - if (i915_is_ggtt(vm)) obj->map_and_fenceable = mappable && fenceable; + } + + WARN_ON(map_and_fenceable && !obj->map_and_fenceable); trace_i915_vma_bind(vma, map_and_fenceable); i915_gem_verify_gtt(dev); @@ -3199,22 +3217,23 @@ err_unpin: return ret; } -void -i915_gem_clflush_object(struct drm_i915_gem_object *obj) +bool +i915_gem_clflush_object(struct drm_i915_gem_object *obj, + bool force) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ if (obj->pages == NULL) - return; + return false; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen) - return; + return false; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -3224,12 +3243,13 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (obj->cache_level != I915_CACHE_NONE) - return; + if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return false; trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); + + return true; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3261,15 +3281,17 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) /** Flushes the CPU write domain for the object if it's dirty. */ static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) +i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj); - i915_gem_chipset_flush(obj->base.dev); + if (i915_gem_clflush_object(obj, force)) + i915_gem_chipset_flush(obj->base.dev); + old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3302,7 +3324,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, false); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3392,7 +3414,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj, cache_level); } - if (cache_level == I915_CACHE_NONE) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->node.color = cache_level; + obj->cache_level = cache_level; + + if (cpu_write_needs_clflush(obj)) { u32 old_read_domains, old_write_domain; /* If we're coming from LLC cached, then we haven't @@ -3402,7 +3428,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * Just set it to the CPU cache for now. */ WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); - WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; @@ -3415,9 +3440,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, old_write_domain); } - list_for_each_entry(vma, &obj->vma_list, vma_link) - vma->node.color = cache_level; - obj->cache_level = cache_level; i915_gem_verify_gtt(dev); return 0; } @@ -3439,7 +3461,20 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, goto unlock; } - args->caching = obj->cache_level != I915_CACHE_NONE; + switch (obj->cache_level) { + case I915_CACHE_LLC: + case I915_CACHE_L3_LLC: + args->caching = I915_CACHING_CACHED; + break; + + case I915_CACHE_WT: + args->caching = I915_CACHING_DISPLAY; + break; + + default: + args->caching = I915_CACHING_NONE; + break; + } drm_gem_object_unreference(&obj->base); unlock: @@ -3462,6 +3497,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, case I915_CACHING_CACHED: level = I915_CACHE_LLC; break; + case I915_CACHING_DISPLAY: + level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; + break; default: return -EINVAL; } @@ -3484,6 +3522,22 @@ unlock: return ret; } +static bool is_pin_display(struct drm_i915_gem_object *obj) +{ + /* There are 3 sources that pin objects: + * 1. The display engine (scanouts, sprites, cursors); + * 2. Reservations for execbuffer; + * 3. The user. + * + * We can ignore reservations as we hold the struct_mutex and + * are only called outside of the reservation path. The user + * can only increment pin_count once, and so if after + * subtracting the potential reference by the user, any pin_count + * remains, it must be due to another use by the display engine. + */ + return obj->pin_count - !!obj->user_pin_count; +} + /* * Prepare buffer for display plane (scanout, cursors, etc). * Can be called from an uninterruptible phase (modesetting) and allows @@ -3503,6 +3557,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return ret; } + /* Mark the pin_display early so that we account for the + * display coherency whilst setting up the cache domains. + */ + obj->pin_display = true; + /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all @@ -3512,9 +3571,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * of uncaching, which would allow us to flush all the LLC-cached data * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + ret = i915_gem_object_set_cache_level(obj, + HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - return ret; + goto err_unpin_display; /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we @@ -3522,9 +3582,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); if (ret) - return ret; + goto err_unpin_display; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, true); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; @@ -3540,6 +3600,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, old_write_domain); return 0; + +err_unpin_display: + obj->pin_display = is_pin_display(obj); + return ret; +} + +void +i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin(obj); + obj->pin_display = is_pin_display(obj); } int @@ -3585,7 +3656,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3767,10 +3838,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count++; obj->pin_filp = file; - /* XXX - flush the CPU caches for pinned objects - * as the X server doesn't manage domains yet - */ - i915_gem_object_flush_cpu_write_domain(obj); args->offset = i915_gem_obj_ggtt_offset(obj); out: drm_gem_object_unreference(&obj->base); @@ -3913,6 +3980,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); + INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); obj->ops = ops; @@ -4052,6 +4120,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->vma_link); INIT_LIST_HEAD(&vma->mm_list); + INIT_LIST_HEAD(&vma->exec_list); vma->vm = vm; vma->obj = obj; @@ -4801,3 +4870,16 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, return NULL; } + +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = i915_gem_vma_create(obj, vm); + + return vma; +} diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 63ee1a9f7978..e918b05fcbdd 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -27,10 +27,15 @@ #include "i915_drv.h" #include +static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) +{ + return to_intel_bo(buf->priv); +} + static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = attachment->dmabuf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); struct sg_table *st; struct scatterlist *src, *dst; int ret, i; @@ -85,14 +90,22 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + + mutex_lock(&obj->base.dev->struct_mutex); + dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); sg_free_table(sg); kfree(sg); + + i915_gem_object_unpin_pages(obj); + + mutex_unlock(&obj->base.dev->struct_mutex); } static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; struct sg_page_iter sg_iter; struct page **pages; @@ -140,7 +153,7 @@ error: static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; int ret; @@ -183,7 +196,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) { - struct drm_i915_gem_object *obj = dma_buf->priv; + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; int ret; bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); @@ -214,9 +227,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - - return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags); + return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) @@ -253,7 +264,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, /* is this one of own objects? */ if (dma_buf->ops == &i915_dmabuf_ops) { - obj = dma_buf->priv; + obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ if (obj->base.dev == dev) { /* diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 425939b7d343..91b700155850 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -37,7 +37,7 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) if (vma->obj->pin_count) return false; - list_add(&vma->obj->exec_list, unwind); + list_add(&vma->exec_list, unwind); return drm_mm_scan_add_block(&vma->node); } @@ -49,7 +49,6 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, drm_i915_private_t *dev_priv = dev->dev_private; struct list_head eviction_list, unwind_list; struct i915_vma *vma; - struct drm_i915_gem_object *obj; int ret = 0; trace_i915_gem_evict(dev, min_size, alignment, mappable); @@ -104,14 +103,13 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, none: /* Nothing found, clean up and bail out! */ while (!list_empty(&unwind_list)) { - obj = list_first_entry(&unwind_list, - struct drm_i915_gem_object, + vma = list_first_entry(&unwind_list, + struct i915_vma, exec_list); - vma = i915_gem_obj_to_vma(obj, vm); ret = drm_mm_scan_remove_block(&vma->node); BUG_ON(ret); - list_del_init(&obj->exec_list); + list_del_init(&vma->exec_list); } /* We expect the caller to unpin, evict all and try again, or give up. @@ -125,28 +123,30 @@ found: * temporary list. */ INIT_LIST_HEAD(&eviction_list); while (!list_empty(&unwind_list)) { - obj = list_first_entry(&unwind_list, - struct drm_i915_gem_object, + vma = list_first_entry(&unwind_list, + struct i915_vma, exec_list); - vma = i915_gem_obj_to_vma(obj, vm); if (drm_mm_scan_remove_block(&vma->node)) { - list_move(&obj->exec_list, &eviction_list); - drm_gem_object_reference(&obj->base); + list_move(&vma->exec_list, &eviction_list); + drm_gem_object_reference(&vma->obj->base); continue; } - list_del_init(&obj->exec_list); + list_del_init(&vma->exec_list); } /* Unbinding will emit any required flushes */ while (!list_empty(&eviction_list)) { - obj = list_first_entry(&eviction_list, - struct drm_i915_gem_object, + struct drm_gem_object *obj; + vma = list_first_entry(&eviction_list, + struct i915_vma, exec_list); - if (ret == 0) - ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm)); - list_del_init(&obj->exec_list); - drm_gem_object_unreference(&obj->base); + obj = &vma->obj->base; + list_del_init(&vma->exec_list); + if (ret == 0) + ret = i915_vma_unbind(vma); + + drm_gem_object_unreference(obj); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8ccc29ac9629..792c52a235ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -171,6 +171,56 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) obj->cache_level != I915_CACHE_NONE); } +static int +relocate_entry_cpu(struct drm_i915_gem_object *obj, + struct drm_i915_gem_relocation_entry *reloc) +{ + uint32_t page_offset = offset_in_page(reloc->offset); + char *vaddr; + int ret = -EINVAL; + + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + return ret; + + vaddr = kmap_atomic(i915_gem_object_get_page(obj, + reloc->offset >> PAGE_SHIFT)); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + kunmap_atomic(vaddr); + + return 0; +} + +static int +relocate_entry_gtt(struct drm_i915_gem_object *obj, + struct drm_i915_gem_relocation_entry *reloc) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t __iomem *reloc_entry; + void __iomem *reloc_page; + int ret = -EINVAL; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + + /* Map the page containing the relocation we're going to perform. */ + reloc->offset += i915_gem_obj_ggtt_offset(obj); + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, + reloc->offset & PAGE_MASK); + reloc_entry = (uint32_t __iomem *) + (reloc_page + offset_in_page(reloc->offset)); + iowrite32(reloc->delta, reloc_entry); + io_mapping_unmap_atomic(reloc_page); + + return 0; +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -255,40 +305,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; - if (use_cpu_reloc(obj)) { - uint32_t page_offset = offset_in_page(reloc->offset); - char *vaddr; - - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) - return ret; - - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; - kunmap_atomic(vaddr); - } else { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; - - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; - - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; - - /* Map the page containing the relocation we're going to perform. */ - reloc->offset += i915_gem_obj_ggtt_offset(obj); - reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + offset_in_page(reloc->offset)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); - } + if (use_cpu_reloc(obj)) + ret = relocate_entry_cpu(obj, reloc); + else + ret = relocate_entry_gtt(obj, reloc); /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; @@ -708,6 +728,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, { struct drm_i915_gem_object *obj; uint32_t flush_domains = 0; + bool flush_chipset = false; int ret; list_for_each_entry(obj, objects, exec_list) { @@ -716,12 +737,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj); + flush_chipset |= i915_gem_clflush_object(obj, false); flush_domains |= obj->base.write_domain; } - if (flush_domains & I915_GEM_DOMAIN_CPU) + if (flush_chipset) i915_gem_chipset_flush(ring->dev); if (flush_domains & I915_GEM_DOMAIN_GTT) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 24fb989593f0..212f6d8c35ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -55,6 +55,7 @@ #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level) @@ -138,8 +139,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, gen6_gtt_pte_t pte = GEN6_PTE_VALID; pte |= HSW_PTE_ADDR_ENCODE(addr); - if (level != I915_CACHE_NONE) + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE0; + break; + default: pte |= HSW_WB_ELLC_LLC_AGE0; + break; + } return pte; } @@ -487,7 +496,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, obj->pin_display); i915_gem_gtt_bind_object(obj, obj->cache_level); } diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 8912f489f53a..9969d10b80f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -296,9 +296,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev, i915_gem_object_pin_pages(obj); obj->stolen = stolen; - obj->base.write_domain = I915_GEM_DOMAIN_GTT; - obj->base.read_domains = I915_GEM_DOMAIN_GTT; - obj->cache_level = I915_CACHE_NONE; + obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE; return obj; @@ -410,8 +409,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, ret = drm_mm_reserve_node(&ggtt->mm, &vma->node); if (ret) { DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); - i915_gem_vma_destroy(vma); - goto err_out; + goto err_vma; } } @@ -422,6 +420,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, return obj; +err_vma: + i915_gem_vma_destroy(vma); err_out: drm_mm_remove_node(stolen); kfree(stolen); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 60393cb9a7c7..558e568d5b45 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -243,6 +243,11 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m, err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", error->semaphore_mboxes[ring][1], error->semaphore_seqno[ring][1]); + if (HAS_VEBOX(dev)) { + err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", + error->semaphore_mboxes[ring][2], + error->semaphore_seqno[ring][2]); + } } err_printf(m, " seqno: 0x%08x\n", error->seqno[ring]); err_printf(m, " waiting: %s\n", yesno(error->waiting[ring])); @@ -682,6 +687,12 @@ static void i915_record_ring_state(struct drm_device *dev, error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1]; } + if (HAS_VEBOX(dev)) { + error->semaphore_mboxes[ring->id][2] = + I915_READ(RING_SYNC_2(ring->mmio_base)); + error->semaphore_seqno[ring->id][2] = ring->sync_seqno[2]; + } + if (INTEL_INFO(dev)->gen >= 4) { error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base)); error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base)); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8a77faf4927d..a03b445ceb5f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -85,6 +85,12 @@ ironlake_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask) { assert_spin_locked(&dev_priv->irq_lock); + if (dev_priv->pc8.irqs_disabled) { + WARN(1, "IRQs disabled\n"); + dev_priv->pc8.regsave.deimr &= ~mask; + return; + } + if ((dev_priv->irq_mask & mask) != 0) { dev_priv->irq_mask &= ~mask; I915_WRITE(DEIMR, dev_priv->irq_mask); @@ -97,6 +103,12 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask) { assert_spin_locked(&dev_priv->irq_lock); + if (dev_priv->pc8.irqs_disabled) { + WARN(1, "IRQs disabled\n"); + dev_priv->pc8.regsave.deimr |= mask; + return; + } + if ((dev_priv->irq_mask & mask) != mask) { dev_priv->irq_mask |= mask; I915_WRITE(DEIMR, dev_priv->irq_mask); @@ -104,6 +116,85 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask) } } +/** + * ilk_update_gt_irq - update GTIMR + * @dev_priv: driver private + * @interrupt_mask: mask of interrupt bits to update + * @enabled_irq_mask: mask of interrupt bits to enable + */ +static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, + uint32_t interrupt_mask, + uint32_t enabled_irq_mask) +{ + assert_spin_locked(&dev_priv->irq_lock); + + if (dev_priv->pc8.irqs_disabled) { + WARN(1, "IRQs disabled\n"); + dev_priv->pc8.regsave.gtimr &= ~interrupt_mask; + dev_priv->pc8.regsave.gtimr |= (~enabled_irq_mask & + interrupt_mask); + return; + } + + dev_priv->gt_irq_mask &= ~interrupt_mask; + dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask); + I915_WRITE(GTIMR, dev_priv->gt_irq_mask); + POSTING_READ(GTIMR); +} + +void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +{ + ilk_update_gt_irq(dev_priv, mask, mask); +} + +void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) +{ + ilk_update_gt_irq(dev_priv, mask, 0); +} + +/** + * snb_update_pm_irq - update GEN6_PMIMR + * @dev_priv: driver private + * @interrupt_mask: mask of interrupt bits to update + * @enabled_irq_mask: mask of interrupt bits to enable + */ +static void snb_update_pm_irq(struct drm_i915_private *dev_priv, + uint32_t interrupt_mask, + uint32_t enabled_irq_mask) +{ + uint32_t new_val; + + assert_spin_locked(&dev_priv->irq_lock); + + if (dev_priv->pc8.irqs_disabled) { + WARN(1, "IRQs disabled\n"); + dev_priv->pc8.regsave.gen6_pmimr &= ~interrupt_mask; + dev_priv->pc8.regsave.gen6_pmimr |= (~enabled_irq_mask & + interrupt_mask); + return; + } + + new_val = dev_priv->pm_irq_mask; + new_val &= ~interrupt_mask; + new_val |= (~enabled_irq_mask & interrupt_mask); + + if (new_val != dev_priv->pm_irq_mask) { + dev_priv->pm_irq_mask = new_val; + I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask); + POSTING_READ(GEN6_PMIMR); + } +} + +void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +{ + snb_update_pm_irq(dev_priv, mask, mask); +} + +void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +{ + snb_update_pm_irq(dev_priv, mask, 0); +} + static bool ivb_can_enable_err_int(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -194,6 +285,15 @@ static void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, assert_spin_locked(&dev_priv->irq_lock); + if (dev_priv->pc8.irqs_disabled && + (interrupt_mask & SDE_HOTPLUG_MASK_CPT)) { + WARN(1, "IRQs disabled\n"); + dev_priv->pc8.regsave.sdeimr &= ~interrupt_mask; + dev_priv->pc8.regsave.sdeimr |= (~enabled_irq_mask & + interrupt_mask); + return; + } + I915_WRITE(SDEIMR, sdeimr); POSTING_READ(SDEIMR); } @@ -711,17 +811,19 @@ static void gen6_pm_rps_work(struct work_struct *work) { drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, rps.work); - u32 pm_iir, pm_imr; + u32 pm_iir; u8 new_delay; spin_lock_irq(&dev_priv->irq_lock); pm_iir = dev_priv->rps.pm_iir; dev_priv->rps.pm_iir = 0; - pm_imr = I915_READ(GEN6_PMIMR); /* Make sure not to corrupt PMIMR state used by ringbuffer code */ - I915_WRITE(GEN6_PMIMR, pm_imr & ~GEN6_PM_RPS_EVENTS); + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); spin_unlock_irq(&dev_priv->irq_lock); + /* Make sure we didn't queue anything we're not going to process. */ + WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS); + if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0) return; @@ -806,8 +908,7 @@ static void ivybridge_parity_work(struct work_struct *work) I915_WRITE(GEN7_MISCCPCTL, misccpctl); spin_lock_irqsave(&dev_priv->irq_lock, flags); - dev_priv->gt_irq_mask &= ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); + ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); mutex_unlock(&dev_priv->dev->struct_mutex); @@ -837,8 +938,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev) return; spin_lock(&dev_priv->irq_lock); - dev_priv->gt_irq_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); + ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT); spin_unlock(&dev_priv->irq_lock); queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work); @@ -879,29 +979,6 @@ static void snb_gt_irq_handler(struct drm_device *dev, ivybridge_parity_error_irq_handler(dev); } -/* Legacy way of handling PM interrupts */ -static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, - u32 pm_iir) -{ - /* - * IIR bits should never already be set because IMR should - * prevent an interrupt from being shown in IIR. The warning - * displays a case where we've unsafely cleared - * dev_priv->rps.pm_iir. Although missing an interrupt of the same - * type is not a problem, it displays a problem in the logic. - * - * The mask bit in IMR is cleared by dev_priv->rps.work. - */ - - spin_lock(&dev_priv->irq_lock); - dev_priv->rps.pm_iir |= pm_iir; - I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir); - POSTING_READ(GEN6_PMIMR); - spin_unlock(&dev_priv->irq_lock); - - queue_work(dev_priv->wq, &dev_priv->rps.work); -} - #define HPD_STORM_DETECT_PERIOD 1000 #define HPD_STORM_THRESHOLD 5 @@ -968,31 +1045,28 @@ static void dp_aux_irq_handler(struct drm_device *dev) wake_up_all(&dev_priv->gmbus_wait_queue); } -/* Unlike gen6_rps_irq_handler() from which this function is originally derived, - * we must be able to deal with other PM interrupts. This is complicated because - * of the way in which we use the masks to defer the RPS work (which for - * posterity is necessary because of forcewake). - */ -static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv, - u32 pm_iir) +/* The RPS events need forcewake, so we add them to a work queue and mask their + * IMR bits until the work is done. Other interrupts can be processed without + * the work queue. */ +static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { if (pm_iir & GEN6_PM_RPS_EVENTS) { spin_lock(&dev_priv->irq_lock); dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS; - I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir); - /* never want to mask useful interrupts. (also posting read) */ - WARN_ON(I915_READ_NOTRACE(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS); + snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS); spin_unlock(&dev_priv->irq_lock); queue_work(dev_priv->wq, &dev_priv->rps.work); } - if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); + if (HAS_VEBOX(dev_priv->dev)) { + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); - if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) { - DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir); - i915_handle_error(dev_priv->dev, false); + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) { + DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir); + i915_handle_error(dev_priv->dev, false); + } } } @@ -1064,7 +1138,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) gmbus_irq_handler(dev); - if (pm_iir & GEN6_PM_RPS_EVENTS) + if (pm_iir) gen6_rps_irq_handler(dev_priv, pm_iir); I915_WRITE(GTIIR, gt_iir); @@ -1309,6 +1383,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; u32 de_iir, gt_iir, de_ier, sde_ier = 0; irqreturn_t ret = IRQ_NONE; + bool err_int_reenable = false; atomic_inc(&dev_priv->irq_received); @@ -1337,7 +1412,9 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) * handler. */ if (IS_HASWELL(dev)) { spin_lock(&dev_priv->irq_lock); - ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB); + err_int_reenable = ~dev_priv->irq_mask & DE_ERR_INT_IVB; + if (err_int_reenable) + ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB); spin_unlock(&dev_priv->irq_lock); } @@ -1364,16 +1441,13 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (INTEL_INFO(dev)->gen >= 6) { u32 pm_iir = I915_READ(GEN6_PMIIR); if (pm_iir) { - if (IS_HASWELL(dev)) - hsw_pm_irq_handler(dev_priv, pm_iir); - else if (pm_iir & GEN6_PM_RPS_EVENTS) - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(dev_priv, pm_iir); I915_WRITE(GEN6_PMIIR, pm_iir); ret = IRQ_HANDLED; } } - if (IS_HASWELL(dev)) { + if (err_int_reenable) { spin_lock(&dev_priv->irq_lock); if (ivb_can_enable_err_int(dev)) ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB); @@ -1826,10 +1900,10 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) u32 tmp; if (ring->hangcheck.acthd != acthd) - return active; + return HANGCHECK_ACTIVE; if (IS_GEN2(dev)) - return hung; + return HANGCHECK_HUNG; /* Is the chip hanging on a WAIT_FOR_EVENT? * If so we can simply poke the RB_WAIT bit @@ -1841,24 +1915,24 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) DRM_ERROR("Kicking stuck wait on %s\n", ring->name); I915_WRITE_CTL(ring, tmp); - return kick; + return HANGCHECK_KICK; } if (INTEL_INFO(dev)->gen >= 6 && tmp & RING_WAIT_SEMAPHORE) { switch (semaphore_passed(ring)) { default: - return hung; + return HANGCHECK_HUNG; case 1: DRM_ERROR("Kicking stuck semaphore on %s\n", ring->name); I915_WRITE_CTL(ring, tmp); - return kick; + return HANGCHECK_KICK; case 0: - return wait; + return HANGCHECK_WAIT; } } - return hung; + return HANGCHECK_HUNG; } /** @@ -1905,8 +1979,6 @@ static void i915_hangcheck_elapsed(unsigned long data) } else busy = false; } else { - int score; - /* We always increment the hangcheck score * if the ring is busy and still processing * the same request, so that no single request @@ -1926,21 +1998,19 @@ static void i915_hangcheck_elapsed(unsigned long data) acthd); switch (ring->hangcheck.action) { - case wait: - score = 0; + case HANGCHECK_WAIT: break; - case active: - score = BUSY; + case HANGCHECK_ACTIVE: + ring->hangcheck.score += BUSY; break; - case kick: - score = KICK; + case HANGCHECK_KICK: + ring->hangcheck.score += KICK; break; - case hung: - score = HUNG; + case HANGCHECK_HUNG: + ring->hangcheck.score += HUNG; stuck[i] = true; break; } - ring->hangcheck.score += score; } } else { /* Gradually reduce the count so that we catch DoS @@ -2158,8 +2228,9 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) if (HAS_VEBOX(dev)) pm_irqs |= PM_VEBOX_USER_INTERRUPT; + dev_priv->pm_irq_mask = 0xffffffff; I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR)); - I915_WRITE(GEN6_PMIMR, 0xffffffff); + I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask); I915_WRITE(GEN6_PMIER, pm_irqs); POSTING_READ(GEN6_PMIER); } @@ -2403,7 +2474,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) u16 iir, new_iir; u32 pipe_stats[2]; unsigned long irqflags; - int irq_received; int pipe; u16 flip_mask = I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | @@ -2437,7 +2507,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) DRM_DEBUG_DRIVER("pipe %c underrun\n", pipe_name(pipe)); I915_WRITE(reg, pipe_stats[pipe]); - irq_received = 1; } } spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); @@ -3081,3 +3150,67 @@ void intel_hpd_init(struct drm_device *dev) dev_priv->display.hpd_irq_setup(dev); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } + +/* Disable interrupts so we can allow Package C8+. */ +void hsw_pc8_disable_interrupts(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + + dev_priv->pc8.regsave.deimr = I915_READ(DEIMR); + dev_priv->pc8.regsave.sdeimr = I915_READ(SDEIMR); + dev_priv->pc8.regsave.gtimr = I915_READ(GTIMR); + dev_priv->pc8.regsave.gtier = I915_READ(GTIER); + dev_priv->pc8.regsave.gen6_pmimr = I915_READ(GEN6_PMIMR); + + ironlake_disable_display_irq(dev_priv, ~DE_PCH_EVENT_IVB); + ibx_disable_display_interrupt(dev_priv, ~SDE_HOTPLUG_MASK_CPT); + ilk_disable_gt_irq(dev_priv, 0xffffffff); + snb_disable_pm_irq(dev_priv, 0xffffffff); + + dev_priv->pc8.irqs_disabled = true; + + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); +} + +/* Restore interrupts so we can recover from Package C8+. */ +void hsw_pc8_restore_interrupts(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + uint32_t val, expected; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + + val = I915_READ(DEIMR); + expected = ~DE_PCH_EVENT_IVB; + WARN(val != expected, "DEIMR is 0x%08x, not 0x%08x\n", val, expected); + + val = I915_READ(SDEIMR) & ~SDE_HOTPLUG_MASK_CPT; + expected = ~SDE_HOTPLUG_MASK_CPT; + WARN(val != expected, "SDEIMR non-HPD bits are 0x%08x, not 0x%08x\n", + val, expected); + + val = I915_READ(GTIMR); + expected = 0xffffffff; + WARN(val != expected, "GTIMR is 0x%08x, not 0x%08x\n", val, expected); + + val = I915_READ(GEN6_PMIMR); + expected = 0xffffffff; + WARN(val != expected, "GEN6_PMIMR is 0x%08x, not 0x%08x\n", val, + expected); + + dev_priv->pc8.irqs_disabled = false; + + ironlake_enable_display_irq(dev_priv, ~dev_priv->pc8.regsave.deimr); + ibx_enable_display_interrupt(dev_priv, + ~dev_priv->pc8.regsave.sdeimr & + ~SDE_HOTPLUG_MASK_CPT); + ilk_enable_gt_irq(dev_priv, ~dev_priv->pc8.regsave.gtimr); + snb_enable_pm_irq(dev_priv, ~dev_priv->pc8.regsave.gen6_pmimr); + I915_WRITE(GTIER, dev_priv->pc8.regsave.gtier); + + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); +} diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 88a2c0792f26..56708c64e68f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1447,6 +1447,8 @@ #define MCH_SSKPD_WM0_MASK 0x3f #define MCH_SSKPD_WM0_VAL 0xc +#define MCH_SECP_NRG_STTS (MCHBAR_MIRROR_BASE_SNB + 0x592c) + /* Clocking configuration register */ #define CLKCFG 0x10c00 #define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */ @@ -1703,15 +1705,26 @@ */ #define CCID 0x2180 #define CCID_EN (1<<0) +/* + * Notes on SNB/IVB/VLV context size: + * - Power context is saved elsewhere (LLC or stolen) + * - Ring/execlist context is saved on SNB, not on IVB + * - Extended context size already includes render context size + * - We always need to follow the extended context size. + * SNB BSpec has comments indicating that we should use the + * render context size instead if execlists are disabled, but + * based on empirical testing that's just nonsense. + * - Pipelined/VF state is saved on SNB/IVB respectively + * - GT1 size just indicates how much of render context + * doesn't need saving on GT1 + */ #define CXT_SIZE 0x21a0 #define GEN6_CXT_POWER_SIZE(cxt_reg) ((cxt_reg >> 24) & 0x3f) #define GEN6_CXT_RING_SIZE(cxt_reg) ((cxt_reg >> 18) & 0x3f) #define GEN6_CXT_RENDER_SIZE(cxt_reg) ((cxt_reg >> 12) & 0x3f) #define GEN6_CXT_EXTENDED_SIZE(cxt_reg) ((cxt_reg >> 6) & 0x3f) #define GEN6_CXT_PIPELINE_SIZE(cxt_reg) ((cxt_reg >> 0) & 0x3f) -#define GEN6_CXT_TOTAL_SIZE(cxt_reg) (GEN6_CXT_POWER_SIZE(cxt_reg) + \ - GEN6_CXT_RING_SIZE(cxt_reg) + \ - GEN6_CXT_RENDER_SIZE(cxt_reg) + \ +#define GEN6_CXT_TOTAL_SIZE(cxt_reg) (GEN6_CXT_RING_SIZE(cxt_reg) + \ GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \ GEN6_CXT_PIPELINE_SIZE(cxt_reg)) #define GEN7_CXT_SIZE 0x21a8 @@ -1721,11 +1734,7 @@ #define GEN7_CXT_EXTENDED_SIZE(ctx_reg) ((ctx_reg >> 9) & 0x7f) #define GEN7_CXT_GT1_SIZE(ctx_reg) ((ctx_reg >> 6) & 0x7) #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) ((ctx_reg >> 0) & 0x3f) -#define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_POWER_SIZE(ctx_reg) + \ - GEN7_CXT_RING_SIZE(ctx_reg) + \ - GEN7_CXT_RENDER_SIZE(ctx_reg) + \ - GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ - GEN7_CXT_GT1_SIZE(ctx_reg) + \ +#define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -4827,8 +4836,8 @@ #define HSW_PWR_WELL_DRIVER 0x45404 /* CTL2 */ #define HSW_PWR_WELL_KVMR 0x45408 /* CTL3 */ #define HSW_PWR_WELL_DEBUG 0x4540C /* CTL4 */ -#define HSW_PWR_WELL_ENABLE (1<<31) -#define HSW_PWR_WELL_STATE (1<<30) +#define HSW_PWR_WELL_ENABLE_REQUEST (1<<31) +#define HSW_PWR_WELL_STATE_ENABLED (1<<30) #define HSW_PWR_WELL_CTL5 0x45410 #define HSW_PWR_WELL_ENABLE_SINGLE_STEP (1<<31) #define HSW_PWR_WELL_PWR_GATE_OVERRIDE (1<<20) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b8c096b4a1de..63aca49d11a8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1139,10 +1139,13 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) { - if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT) + uint32_t lcpll = I915_READ(LCPLL_CTL); + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT) return 450000; - else if ((I915_READ(LCPLL_CTL) & LCPLL_CLK_FREQ_MASK) == - LCPLL_CLK_FREQ_450) + else if ((lcpll & LCPLL_CLK_FREQ_MASK) == LCPLL_CLK_FREQ_450) return 450000; else if (IS_ULT(dev_priv->dev)) return 337500; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b52f374d0f00..10c1db596387 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -690,7 +690,7 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc, { u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; u32 m, n, fastclk; - u32 updrate, minupdate, fracbits, p; + u32 updrate, minupdate, p; unsigned long bestppm, ppm, absppm; int dotclk, flag; @@ -701,7 +701,6 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc, fastclk = dotclk / (2*100); updrate = 0; minupdate = 19200; - fracbits = 1; n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0; bestm1 = bestm2 = bestp1 = bestp2 = 0; @@ -1877,7 +1876,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, return 0; err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_unpin_from_display_plane(obj); err_interruptible: dev_priv->mm.interruptible = true; return ret; @@ -1886,7 +1885,7 @@ err_interruptible: void intel_unpin_fb_obj(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_fence(obj); - i915_gem_object_unpin(obj); + i915_gem_object_unpin_from_display_plane(obj); } /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel @@ -2598,7 +2597,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - u32 reg, temp, i; + u32 reg, temp, i, j; /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit for train result */ @@ -2614,97 +2613,99 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n", I915_READ(FDI_RX_IIR(pipe))); - /* enable CPU FDI TX and PCH FDI RX */ - reg = FDI_TX_CTL(pipe); - temp = I915_READ(reg); - temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes); - temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB); - temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; - temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; - temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; - temp |= FDI_COMPOSITE_SYNC; - I915_WRITE(reg, temp | FDI_TX_ENABLE); - - I915_WRITE(FDI_RX_MISC(pipe), - FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90); - - reg = FDI_RX_CTL(pipe); - temp = I915_READ(reg); - temp &= ~FDI_LINK_TRAIN_AUTO; - temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; - temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; - temp |= FDI_COMPOSITE_SYNC; - I915_WRITE(reg, temp | FDI_RX_ENABLE); - - POSTING_READ(reg); - udelay(150); - - for (i = 0; i < 4; i++) { + /* Try each vswing and preemphasis setting twice before moving on */ + for (j = 0; j < ARRAY_SIZE(snb_b_fdi_train_param) * 2; j++) { + /* disable first in case we need to retry */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); + temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB); + temp &= ~FDI_TX_ENABLE; + I915_WRITE(reg, temp); + + reg = FDI_RX_CTL(pipe); + temp = I915_READ(reg); + temp &= ~FDI_LINK_TRAIN_AUTO; + temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; + temp &= ~FDI_RX_ENABLE; + I915_WRITE(reg, temp); + + /* enable CPU FDI TX and PCH FDI RX */ + reg = FDI_TX_CTL(pipe); + temp = I915_READ(reg); + temp &= ~FDI_DP_PORT_WIDTH_MASK; + temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes); + temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; - temp |= snb_b_fdi_train_param[i]; + temp |= snb_b_fdi_train_param[j/2]; + temp |= FDI_COMPOSITE_SYNC; + I915_WRITE(reg, temp | FDI_TX_ENABLE); + + I915_WRITE(FDI_RX_MISC(pipe), + FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90); + + reg = FDI_RX_CTL(pipe); + temp = I915_READ(reg); + temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; + temp |= FDI_COMPOSITE_SYNC; + I915_WRITE(reg, temp | FDI_RX_ENABLE); + + POSTING_READ(reg); + udelay(1); /* should be 0.5us */ + + for (i = 0; i < 4; i++) { + reg = FDI_RX_IIR(pipe); + temp = I915_READ(reg); + DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); + + if (temp & FDI_RX_BIT_LOCK || + (I915_READ(reg) & FDI_RX_BIT_LOCK)) { + I915_WRITE(reg, temp | FDI_RX_BIT_LOCK); + DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", + i); + break; + } + udelay(1); /* should be 0.5us */ + } + if (i == 4) { + DRM_DEBUG_KMS("FDI train 1 fail on vswing %d\n", j / 2); + continue; + } + + /* Train 2 */ + reg = FDI_TX_CTL(pipe); + temp = I915_READ(reg); + temp &= ~FDI_LINK_TRAIN_NONE_IVB; + temp |= FDI_LINK_TRAIN_PATTERN_2_IVB; + I915_WRITE(reg, temp); + + reg = FDI_RX_CTL(pipe); + temp = I915_READ(reg); + temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; + temp |= FDI_LINK_TRAIN_PATTERN_2_CPT; I915_WRITE(reg, temp); POSTING_READ(reg); - udelay(500); + udelay(2); /* should be 1.5us */ - reg = FDI_RX_IIR(pipe); - temp = I915_READ(reg); - DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); + for (i = 0; i < 4; i++) { + reg = FDI_RX_IIR(pipe); + temp = I915_READ(reg); + DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); - if (temp & FDI_RX_BIT_LOCK || - (I915_READ(reg) & FDI_RX_BIT_LOCK)) { - I915_WRITE(reg, temp | FDI_RX_BIT_LOCK); - DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i); - break; + if (temp & FDI_RX_SYMBOL_LOCK || + (I915_READ(reg) & FDI_RX_SYMBOL_LOCK)) { + I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK); + DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", + i); + goto train_done; + } + udelay(2); /* should be 1.5us */ } + if (i == 4) + DRM_DEBUG_KMS("FDI train 2 fail on vswing %d\n", j / 2); } - if (i == 4) - DRM_ERROR("FDI train 1 fail!\n"); - - /* Train 2 */ - reg = FDI_TX_CTL(pipe); - temp = I915_READ(reg); - temp &= ~FDI_LINK_TRAIN_NONE_IVB; - temp |= FDI_LINK_TRAIN_PATTERN_2_IVB; - temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; - temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; - I915_WRITE(reg, temp); - - reg = FDI_RX_CTL(pipe); - temp = I915_READ(reg); - temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; - temp |= FDI_LINK_TRAIN_PATTERN_2_CPT; - I915_WRITE(reg, temp); - - POSTING_READ(reg); - udelay(150); - - for (i = 0; i < 4; i++) { - reg = FDI_TX_CTL(pipe); - temp = I915_READ(reg); - temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; - temp |= snb_b_fdi_train_param[i]; - I915_WRITE(reg, temp); - - POSTING_READ(reg); - udelay(500); - - reg = FDI_RX_IIR(pipe); - temp = I915_READ(reg); - DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); - - if (temp & FDI_RX_SYMBOL_LOCK) { - I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK); - DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i); - break; - } - } - if (i == 4) - DRM_ERROR("FDI train 2 fail!\n"); +train_done: DRM_DEBUG_KMS("FDI train done.\n"); } @@ -4423,13 +4424,10 @@ static void vlv_update_pll(struct intel_crtc *crtc) int pipe = crtc->pipe; u32 dpll, mdiv; u32 bestn, bestm1, bestm2, bestp1, bestp2; - bool is_hdmi; u32 coreclk, reg_val, dpll_md; mutex_lock(&dev_priv->dpio_lock); - is_hdmi = intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI); - bestn = crtc->config.dpll.n; bestm1 = crtc->config.dpll.m1; bestm2 = crtc->config.dpll.m2; @@ -5934,11 +5932,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) struct intel_ddi_plls *plls = &dev_priv->ddi_plls; struct intel_crtc *crtc; unsigned long irqflags; - uint32_t val, pch_hpd_mask; - - pch_hpd_mask = SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT; - if (!(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)) - pch_hpd_mask |= SDE_PORTD_HOTPLUG_CPT | SDE_CRT_HOTPLUG_CPT; + uint32_t val; list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n", @@ -5964,7 +5958,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) WARN((val & ~DE_PCH_EVENT_IVB) != val, "Unexpected DEIMR bits enabled: 0x%x\n", val); val = I915_READ(SDEIMR); - WARN((val & ~pch_hpd_mask) != val, + WARN((val | SDE_HOTPLUG_MASK_CPT) != 0xffffffff, "Unexpected SDEIMR bits enabled: 0x%x\n", val); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } @@ -6035,16 +6029,21 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv) LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK) return; + /* Make sure we're not on PC8 state before disabling PC8, otherwise + * we'll hang the machine! */ + dev_priv->uncore.funcs.force_wake_get(dev_priv); + if (val & LCPLL_POWER_DOWN_ALLOW) { val &= ~LCPLL_POWER_DOWN_ALLOW; I915_WRITE(LCPLL_CTL, val); + POSTING_READ(LCPLL_CTL); } val = I915_READ(D_COMP); val |= D_COMP_COMP_FORCE; val &= ~D_COMP_COMP_DISABLE; I915_WRITE(D_COMP, val); - I915_READ(D_COMP); + POSTING_READ(D_COMP); val = I915_READ(LCPLL_CTL); val &= ~LCPLL_PLL_DISABLE; @@ -6062,6 +6061,168 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv) LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); } + + dev_priv->uncore.funcs.force_wake_put(dev_priv); +} + +void hsw_enable_pc8_work(struct work_struct *__work) +{ + struct drm_i915_private *dev_priv = + container_of(to_delayed_work(__work), struct drm_i915_private, + pc8.enable_work); + struct drm_device *dev = dev_priv->dev; + uint32_t val; + + if (dev_priv->pc8.enabled) + return; + + DRM_DEBUG_KMS("Enabling package C8+\n"); + + dev_priv->pc8.enabled = true; + + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { + val = I915_READ(SOUTH_DSPCLK_GATE_D); + val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); + } + + lpt_disable_clkout_dp(dev); + hsw_pc8_disable_interrupts(dev); + hsw_disable_lcpll(dev_priv, true, true); +} + +static void __hsw_enable_package_c8(struct drm_i915_private *dev_priv) +{ + WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock)); + WARN(dev_priv->pc8.disable_count < 1, + "pc8.disable_count: %d\n", dev_priv->pc8.disable_count); + + dev_priv->pc8.disable_count--; + if (dev_priv->pc8.disable_count != 0) + return; + + schedule_delayed_work(&dev_priv->pc8.enable_work, + msecs_to_jiffies(i915_pc8_timeout)); +} + +static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + uint32_t val; + + WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock)); + WARN(dev_priv->pc8.disable_count < 0, + "pc8.disable_count: %d\n", dev_priv->pc8.disable_count); + + dev_priv->pc8.disable_count++; + if (dev_priv->pc8.disable_count != 1) + return; + + cancel_delayed_work_sync(&dev_priv->pc8.enable_work); + if (!dev_priv->pc8.enabled) + return; + + DRM_DEBUG_KMS("Disabling package C8+\n"); + + hsw_restore_lcpll(dev_priv); + hsw_pc8_restore_interrupts(dev); + lpt_init_pch_refclk(dev); + + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { + val = I915_READ(SOUTH_DSPCLK_GATE_D); + val |= PCH_LP_PARTITION_LEVEL_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); + } + + intel_prepare_ddi(dev); + i915_gem_init_swizzling(dev); + mutex_lock(&dev_priv->rps.hw_lock); + gen6_update_ring_freq(dev); + mutex_unlock(&dev_priv->rps.hw_lock); + dev_priv->pc8.enabled = false; +} + +void hsw_enable_package_c8(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->pc8.lock); + __hsw_enable_package_c8(dev_priv); + mutex_unlock(&dev_priv->pc8.lock); +} + +void hsw_disable_package_c8(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->pc8.lock); + __hsw_disable_package_c8(dev_priv); + mutex_unlock(&dev_priv->pc8.lock); +} + +static bool hsw_can_enable_package_c8(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + struct intel_crtc *crtc; + uint32_t val; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) + if (crtc->base.enabled) + return false; + + /* This case is still possible since we have the i915.disable_power_well + * parameter and also the KVMr or something else might be requesting the + * power well. */ + val = I915_READ(HSW_PWR_WELL_DRIVER); + if (val != 0) { + DRM_DEBUG_KMS("Not enabling PC8: power well on\n"); + return false; + } + + return true; +} + +/* Since we're called from modeset_global_resources there's no way to + * symmetrically increase and decrease the refcount, so we use + * dev_priv->pc8.requirements_met to track whether we already have the refcount + * or not. + */ +static void hsw_update_package_c8(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + bool allow; + + if (!i915_enable_pc8) + return; + + mutex_lock(&dev_priv->pc8.lock); + + allow = hsw_can_enable_package_c8(dev_priv); + + if (allow == dev_priv->pc8.requirements_met) + goto done; + + dev_priv->pc8.requirements_met = allow; + + if (allow) + __hsw_enable_package_c8(dev_priv); + else + __hsw_disable_package_c8(dev_priv); + +done: + mutex_unlock(&dev_priv->pc8.lock); +} + +static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->pc8.gpu_idle) { + dev_priv->pc8.gpu_idle = true; + hsw_enable_package_c8(dev_priv); + } +} + +static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv) +{ + if (dev_priv->pc8.gpu_idle) { + dev_priv->pc8.gpu_idle = false; + hsw_disable_package_c8(dev_priv); + } } static void haswell_modeset_global_resources(struct drm_device *dev) @@ -6079,6 +6240,8 @@ static void haswell_modeset_global_resources(struct drm_device *dev) } intel_set_power_well(dev, enable); + + hsw_update_package_c8(dev); } static int haswell_crtc_mode_set(struct drm_crtc *crtc, @@ -6759,7 +6922,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, if (intel_crtc->cursor_bo != obj) i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); } else - i915_gem_object_unpin(intel_crtc->cursor_bo); + i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo); drm_gem_object_unreference(&intel_crtc->cursor_bo->base); } @@ -6774,7 +6937,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, return 0; fail_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_unpin_from_display_plane(obj); fail_locked: mutex_unlock(&dev->struct_mutex); fail: @@ -7310,13 +7473,19 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc) void intel_mark_busy(struct drm_device *dev) { - i915_update_gfx_val(dev->dev_private); + struct drm_i915_private *dev_priv = dev->dev_private; + + hsw_package_c8_gpu_busy(dev_priv); + i915_update_gfx_val(dev_priv); } void intel_mark_idle(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; + hsw_package_c8_gpu_idle(dev_priv); + if (!i915_powersave) return; @@ -8891,6 +9060,9 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set, drm_mode_debug_printmodeline(set->mode); config->mode_changed = true; } + + DRM_DEBUG_KMS("computed changes for [CRTC:%d], mode_changed=%d, fb_changed=%d\n", + set->crtc->base.id, config->mode_changed, config->fb_changed); } static int @@ -8901,14 +9073,13 @@ intel_modeset_stage_output_state(struct drm_device *dev, struct drm_crtc *new_crtc; struct intel_connector *connector; struct intel_encoder *encoder; - int count, ro; + int ro; /* The upper layers ensure that we either disable a crtc or have a list * of connectors. For paranoia, double-check this. */ WARN_ON(!set->fb && (set->num_connectors != 0)); WARN_ON(set->fb && (set->num_connectors == 0)); - count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { /* Otherwise traverse passed in connector list and get encoders @@ -8942,7 +9113,6 @@ intel_modeset_stage_output_state(struct drm_device *dev, /* connector->new_encoder is now updated for all connectors. */ /* Update crtc of enabled connectors. */ - count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (!connector->new_encoder) @@ -10114,6 +10284,17 @@ void i915_redisable_vga(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; u32 vga_reg = i915_vgacntrl_reg(dev); + /* This function can be called both from intel_modeset_setup_hw_state or + * at a very early point in our resume sequence, where the power well + * structures are not yet restored. Since this function is at a very + * paranoid "someone might have enabled VGA while we were not looking" + * level, just check if the power well is enabled instead of trying to + * follow the "don't touch the power well if we don't need it" policy + * the rest of the driver uses. */ + if (HAS_POWER_WELL(dev) && + (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE_ENABLED) == 0) + return; + if (I915_READ(vga_reg) != VGA_DISP_DISABLE) { DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); i915_disable_vga(dev); @@ -10302,7 +10483,6 @@ void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; /* * Interrupts and polling as the first thing to avoid creating havoc. @@ -10326,7 +10506,6 @@ void intel_modeset_cleanup(struct drm_device *dev) if (!crtc->fb) continue; - intel_crtc = to_intel_crtc(crtc); intel_increase_pllclock(crtc); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 63b6722d4285..2151d13772b8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -344,6 +344,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, else precharge = 5; + intel_aux_display_runtime_get(dev_priv); + /* Try to wait for any previous AUX channel activity */ for (try = 0; try < 3; try++) { status = I915_READ_NOTRACE(ch_ctl); @@ -434,6 +436,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, ret = recv_bytes; out: pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); + intel_aux_display_runtime_put(dev_priv); return ret; } @@ -2326,7 +2329,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) struct drm_device *dev = encoder->dev; int i; uint8_t voltage; - bool clock_recovery = false; int voltage_tries, loop_tries; uint32_t DP = intel_dp->DP; @@ -2344,7 +2346,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) voltage = 0xff; voltage_tries = 0; loop_tries = 0; - clock_recovery = false; for (;;) { /* Use intel_dp->train_set[0] to set the voltage and pre emphasis values */ uint8_t link_status[DP_LINK_STATUS_SIZE]; @@ -2365,7 +2366,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("clock recovery OK\n"); - clock_recovery = true; break; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 01455aa8b8bb..176080822a74 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -745,6 +745,7 @@ extern void intel_set_power_well(struct drm_device *dev, bool enable); extern void intel_enable_gt_powersave(struct drm_device *dev); extern void intel_disable_gt_powersave(struct drm_device *dev); extern void ironlake_teardown_rc6(struct drm_device *dev); +void gen6_update_ring_freq(struct drm_device *dev); extern bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); @@ -778,5 +779,18 @@ extern void intel_edp_psr_update(struct drm_device *dev); extern void hsw_disable_lcpll(struct drm_i915_private *dev_priv, bool switch_to_fclk, bool allow_power_down); extern void hsw_restore_lcpll(struct drm_i915_private *dev_priv); +extern void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +extern void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, + uint32_t mask); +extern void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); +extern void snb_disable_pm_irq(struct drm_i915_private *dev_priv, + uint32_t mask); +extern void hsw_enable_pc8_work(struct work_struct *__work); +extern void hsw_enable_package_c8(struct drm_i915_private *dev_priv); +extern void hsw_disable_package_c8(struct drm_i915_private *dev_priv); +extern void hsw_pc8_disable_interrupts(struct drm_device *dev); +extern void hsw_pc8_restore_interrupts(struct drm_device *dev); +extern void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv); +extern void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv); #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f27b91eeeb64..4148cc85bf7f 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1273,7 +1273,6 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port) { struct intel_digital_port *intel_dig_port; struct intel_encoder *intel_encoder; - struct drm_encoder *encoder; struct intel_connector *intel_connector; intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL); @@ -1287,7 +1286,6 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port) } intel_encoder = &intel_dig_port->base; - encoder = &intel_encoder->base; drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS); diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 639fe192997c..d1c1e0f7f262 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -398,6 +398,7 @@ gmbus_xfer(struct i2c_adapter *adapter, int i, reg_offset; int ret = 0; + intel_aux_display_runtime_get(dev_priv); mutex_lock(&dev_priv->gmbus_mutex); if (bus->force_bit) { @@ -497,6 +498,7 @@ timeout: out: mutex_unlock(&dev_priv->gmbus_mutex); + intel_aux_display_runtime_put(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3ac5fe9d428a..0150ba598bf0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3450,11 +3450,11 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); WARN_ON(dev_priv->rps.pm_iir); - I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS); + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); spin_unlock_irq(&dev_priv->irq_lock); - /* unmask all PM interrupts */ - I915_WRITE(GEN6_PMINTRMSK, 0); + /* only unmask PM interrupts we need. Mask all others. */ + I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS); } static void gen6_enable_rps(struct drm_device *dev) @@ -3508,7 +3508,10 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); + if (INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) + I915_WRITE(GEN6_RC6_THRESHOLD, 125000); + else + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ @@ -3604,7 +3607,7 @@ static void gen6_enable_rps(struct drm_device *dev) gen6_gt_force_wake_put(dev_priv); } -static void gen6_update_ring_freq(struct drm_device *dev) +void gen6_update_ring_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int min_freq = 15; @@ -4861,10 +4864,6 @@ static void gen6_init_clock_gating(struct drm_device *dev) ILK_DPARBUNIT_CLOCK_GATE_ENABLE | ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - /* WaMbcDriverBootEnable:snb */ - I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | - GEN6_MBCTL_ENABLE_BOOT_FETCH); - g4x_disable_trickle_feed(dev); /* The default value should be 0x200 according to docs, but the two @@ -4960,10 +4959,6 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - /* WaMbcDriverBootEnable:hsw */ - I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | - GEN6_MBCTL_ENABLE_BOOT_FETCH); - /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -5047,10 +5042,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) g4x_disable_trickle_feed(dev); - /* WaMbcDriverBootEnable:ivb */ - I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | - GEN6_MBCTL_ENABLE_BOOT_FETCH); - /* WaVSRefCountFullforceMissDisable:ivb */ gen7_setup_fixed_func_scheduler(dev_priv); @@ -5110,11 +5101,6 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - /* WaMbcDriverBootEnable:vlv */ - I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | - GEN6_MBCTL_ENABLE_BOOT_FETCH); - - /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock * gating disable must be set. Failure to set it results in * flickering pixels due to Z write ordering failures after @@ -5282,7 +5268,7 @@ bool intel_display_power_enabled(struct drm_device *dev, case POWER_DOMAIN_TRANSCODER_B: case POWER_DOMAIN_TRANSCODER_C: return I915_READ(HSW_PWR_WELL_DRIVER) == - (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE); + (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED); default: BUG(); } @@ -5295,17 +5281,18 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable) uint32_t tmp; tmp = I915_READ(HSW_PWR_WELL_DRIVER); - is_enabled = tmp & HSW_PWR_WELL_STATE; - enable_requested = tmp & HSW_PWR_WELL_ENABLE; + is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED; + enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST; if (enable) { if (!enable_requested) - I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE); + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); if (!is_enabled) { DRM_DEBUG_KMS("Enabling power well\n"); if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) & - HSW_PWR_WELL_STATE), 20)) + HSW_PWR_WELL_STATE_ENABLED), 20)) DRM_ERROR("Timeout enabling power well\n"); } } else { @@ -5407,10 +5394,21 @@ void intel_init_power_well(struct drm_device *dev) /* We're taking over the BIOS, so clear any requests made by it since * the driver is in charge now. */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE) + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) I915_WRITE(HSW_PWR_WELL_BIOS, 0); } +/* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */ +void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv) +{ + hsw_disable_package_c8(dev_priv); +} + +void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv) +{ + hsw_enable_package_c8(dev_priv); +} + /* Set up chip specific power management-related functions */ void intel_init_pm(struct drm_device *dev) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 74d02a704515..7de29d40d1ad 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -836,11 +836,8 @@ gen5_ring_get_irq(struct intel_ring_buffer *ring) return false; spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (ring->irq_refcount++ == 0) { - dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); - } + if (ring->irq_refcount++ == 0) + ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); return true; @@ -854,11 +851,8 @@ gen5_ring_put_irq(struct intel_ring_buffer *ring) unsigned long flags; spin_lock_irqsave(&dev_priv->irq_lock, flags); - if (--ring->irq_refcount == 0) { - dev_priv->gt_irq_mask |= ring->irq_enable_mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); - } + if (--ring->irq_refcount == 0) + ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } @@ -1028,9 +1022,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); else I915_WRITE_IMR(ring, ~ring->irq_enable_mask); - dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); + ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1051,9 +1043,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); else I915_WRITE_IMR(ring, ~0); - dev_priv->gt_irq_mask |= ring->irq_enable_mask; - I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - POSTING_READ(GTIMR); + ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1072,10 +1062,8 @@ hsw_vebox_get_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { - u32 pm_imr = I915_READ(GEN6_PMIMR); I915_WRITE_IMR(ring, ~ring->irq_enable_mask); - I915_WRITE(GEN6_PMIMR, pm_imr & ~ring->irq_enable_mask); - POSTING_READ(GEN6_PMIMR); + snb_enable_pm_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1094,10 +1082,8 @@ hsw_vebox_put_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { - u32 pm_imr = I915_READ(GEN6_PMIMR); I915_WRITE_IMR(ring, ~0); - I915_WRITE(GEN6_PMIMR, pm_imr | ring->irq_enable_mask); - POSTING_READ(GEN6_PMIMR); + snb_disable_pm_irq(dev_priv, ring->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } @@ -1594,6 +1580,8 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) if (INTEL_INFO(ring->dev)->gen >= 6) { I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); + if (HAS_VEBOX(ring->dev)) + I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); } ring->set_seqno(ring, seqno); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6e38256d41e1..432ad5311ba6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -33,11 +33,12 @@ struct intel_hw_status_page { #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) -#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)->mmio_base)) -#define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base)) -#define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base)) - -enum intel_ring_hangcheck_action { wait, active, kick, hung }; +enum intel_ring_hangcheck_action { + HANGCHECK_WAIT, + HANGCHECK_ACTIVE, + HANGCHECK_KICK, + HANGCHECK_HUNG, +}; struct intel_ring_hangcheck { bool deadlock; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 02f220b4e4a1..317e058fb3cf 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -538,7 +538,8 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo, &status)) goto log_fail; - while (status == SDVO_CMD_STATUS_PENDING && --retry) { + while ((status == SDVO_CMD_STATUS_PENDING || + status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && --retry) { if (retry < 10) msleep(15); else diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a1a7b6bd60d8..55bb5729bd78 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -334,6 +334,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PINNED_BATCHES 24 #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 +#define I915_PARAM_HAS_WT 27 typedef struct drm_i915_getparam { int param; @@ -768,8 +769,32 @@ struct drm_i915_gem_busy { __u32 busy; }; +/** + * I915_CACHING_NONE + * + * GPU access is not coherent with cpu caches. Default for machines without an + * LLC. + */ #define I915_CACHING_NONE 0 +/** + * I915_CACHING_CACHED + * + * GPU access is coherent with cpu caches and furthermore the data is cached in + * last-level caches shared between cpu cores and the gpu GT. Default on + * machines with HAS_LLC. + */ #define I915_CACHING_CACHED 1 +/** + * I915_CACHING_DISPLAY + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no special + * cache mode (like write-through or gfdt flushing) is available. The kernel + * automatically sets this mode when using a buffer as a scanout target. + * Userspace can manually set this mode to avoid a costly stall and clflush in + * the hotpath of drawing the first frame. + */ +#define I915_CACHING_DISPLAY 2 struct drm_i915_gem_caching { /**