From c76ce038e31a2b30bc3dd816f0aefaf685097a0a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Aug 2013 14:41:03 +0100
Subject: [PATCH 01/62] drm/i915: Update rules for reading cache lines through
 the LLC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LLC is a fun device. The cache is a distinct functional block within
the SA that arbitrates access from both the CPU and GPU cores. As such
all writes to memory land first in the LLC before further action is
taken. For example, an uncached write from either the CPU or GPU will
then proceed to memory and evict the cacheline from the LLC. This means that
a read from the LLC always returns the correct information even if the PTE
bit in the GPU differs from the PAT bit in the CPU. For the older
snooping architecture on non-LLC, the fundamental principle still holds
except that some coordination is required between the CPU and GPU to
explicitly perform the snooping (which is handled by our request
tracking).

The upshot of this is that we know that we can issue a read from either
LLC devices or snoopable memory and trust the contents of the cache -
i.e. we can forgo a clflush before a read in these circumstances.
Writing to memory from the CPU is a little more tricky as we have to
consider that the scanout does not read from the CPU cache at all, but
from main memory. So we have to currently treat all requests to write to
uncached memory as having to be flushed to main memory for coherency
with all consumers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 79cef3c9b1ad..2a1c87141693 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -61,6 +61,12 @@ static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
+static bool cpu_cache_is_coherent(struct drm_device *dev,
+				  enum i915_cache_level level)
+{
+	return HAS_LLC(dev) || level != I915_CACHE_NONE;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
 	if (obj->tiling_mode)
@@ -420,8 +426,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		 * read domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will dirty the data
 		 * anyway again before the next pread happens. */
-		if (obj->cache_level == I915_CACHE_NONE)
-			needs_clflush = 1;
+		needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
 		if (i915_gem_obj_bound_any(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
 			if (ret)
@@ -745,11 +750,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 				return ret;
 		}
 	}
-	/* Same trick applies for invalidate partially written cachelines before
-	 * writing.  */
-	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
-	    && obj->cache_level == I915_CACHE_NONE)
-		needs_clflush_before = 1;
+	/* Same trick applies to invalidate partially written cachelines read
+	 * before writing. */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+		needs_clflush_before =
+			!cpu_cache_is_coherent(dev, obj->cache_level);
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
@@ -3597,7 +3602,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj);
+		if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+			i915_gem_clflush_object(obj);
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}

From cc98b413c197c4c6a62b1e469e9d05e613571af5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 9 Aug 2013 12:25:09 +0100
Subject: [PATCH 02/62] drm/i915: Track when an object is pinned for use by the
 display engine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The display engine has unique coherency rules such that it requires
special handling to ensure that all writes to cursors, scanouts and
sprites are clflushed. This patch introduces the infrastructure to
simply track when an object is being accessed by the display engine.

v2: Explain the is_pin_display() magic as the sources for obj->pin_count
and their individual rules is not obvious. (Ville)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  2 ++
 drivers/gpu/drm/i915/i915_drv.h      |  2 ++
 drivers/gpu/drm/i915/i915_gem.c      | 36 ++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_display.c |  8 +++----
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1a87cc9fd899..eb87865c20d4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -117,6 +117,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (name: %d)", obj->base.name);
 	if (obj->pin_count)
 		seq_printf(m, " (pinned x %d)", obj->pin_count);
+	if (obj->pin_display)
+		seq_printf(m, " (display)");
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	list_for_each_entry(vma, &obj->vma_list, vma_link) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6abf8f9d9b14..b5df2308f8e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1377,6 +1377,7 @@ struct drm_i915_gem_object {
 	 */
 	unsigned int fault_mappable:1;
 	unsigned int pin_mappable:1;
+	unsigned int pin_display:1;
 
 	/*
 	 * Is the GPU currently using a fence to access this buffer,
@@ -1867,6 +1868,7 @@ int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     struct intel_ring_buffer *pipelined);
+void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
 int i915_gem_attach_phys_object(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				int id,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2a1c87141693..b98c3b0e5a02 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3501,6 +3501,22 @@ unlock:
 	return ret;
 }
 
+static bool is_pin_display(struct drm_i915_gem_object *obj)
+{
+	/* There are 3 sources that pin objects:
+	 *   1. The display engine (scanouts, sprites, cursors);
+	 *   2. Reservations for execbuffer;
+	 *   3. The user.
+	 *
+	 * We can ignore reservations as we hold the struct_mutex and
+	 * are only called outside of the reservation path.  The user
+	 * can only increment pin_count once, and so if after
+	 * subtracting the potential reference by the user, any pin_count
+	 * remains, it must be due to another use by the display engine.
+	 */
+	return obj->pin_count - !!obj->user_pin_count;
+}
+
 /*
  * Prepare buffer for display plane (scanout, cursors, etc).
  * Can be called from an uninterruptible phase (modesetting) and allows
@@ -3520,6 +3536,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 			return ret;
 	}
 
+	/* Mark the pin_display early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_display = true;
+
 	/* The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
 	 * done with uncached PTEs. This is lowest common denominator for all
@@ -3531,7 +3552,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 */
 	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
 	if (ret)
-		return ret;
+		goto err_unpin_display;
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
@@ -3539,7 +3560,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 */
 	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
 	if (ret)
-		return ret;
+		goto err_unpin_display;
 
 	i915_gem_object_flush_cpu_write_domain(obj);
 
@@ -3557,6 +3578,17 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 					    old_write_domain);
 
 	return 0;
+
+err_unpin_display:
+	obj->pin_display = is_pin_display(obj);
+	return ret;
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin(obj);
+	obj->pin_display = is_pin_display(obj);
 }
 
 int
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 370c902fa629..bafdc3e21e87 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1877,7 +1877,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	return 0;
 
 err_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 err_interruptible:
 	dev_priv->mm.interruptible = true;
 	return ret;
@@ -1886,7 +1886,7 @@ err_interruptible:
 void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
 {
 	i915_gem_object_unpin_fence(obj);
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 }
 
 /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
@@ -6759,7 +6759,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 			if (intel_crtc->cursor_bo != obj)
 				i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo);
 		} else
-			i915_gem_object_unpin(intel_crtc->cursor_bo);
+			i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo);
 		drm_gem_object_unreference(&intel_crtc->cursor_bo->base);
 	}
 
@@ -6774,7 +6774,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 
 	return 0;
 fail_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 fail_locked:
 	mutex_unlock(&dev->struct_mutex);
 fail:

From 2c22569bba8af6c2976d5f9479fe54a53a39966b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 9 Aug 2013 12:26:45 +0100
Subject: [PATCH 03/62] drm/i915: Update rules for writing through the LLC with
 the cpu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.

Except for the scanout.

The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.

v2: Use pin_display tracking rather than fb_count (to ensure we flush
cursors as well etc) and only force the clflush along explicit writes to
the scanout paths (i.e. pin_to_display_plane and pwrite into scanout).

v3: Force the flush after hitting the slowpath in pwrite, as after
dropping the lock the object's cache domain may be invalidated. (Ville)

Based on a patch by Ville Syrjälä.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 58 ++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  2 +-
 4 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b5df2308f8e6..34d3f2fae8ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 }
 
 void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b98c3b0e5a02..54d76e9392d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,7 +37,8 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+						   bool force);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
 }
 
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+		return true;
+
+	return obj->pin_display;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
 	if (obj->tiling_mode)
@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		 * write domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
-		if (obj->cache_level == I915_CACHE_NONE)
-			needs_clflush_after = 1;
+		needs_clflush_after = cpu_write_needs_clflush(obj);
 		if (i915_gem_obj_bound_any(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, true);
 			if (ret)
@@ -833,7 +841,7 @@ out:
 		 */
 		if (!needs_clflush_after &&
 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-			i915_gem_clflush_object(obj);
+			i915_gem_clflush_object(obj, obj->pin_display);
 			i915_gem_chipset_flush(dev);
 		}
 	}
@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	if (obj->cache_level == I915_CACHE_NONE &&
-	    obj->tiling_mode == I915_TILING_NONE &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+	if (obj->tiling_mode == I915_TILING_NONE &&
+	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+	    cpu_write_needs_clflush(obj)) {
 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (obj->pin_count)
-		i915_gem_object_flush_cpu_write_domain(obj);
+	if (obj->pin_display)
+		i915_gem_object_flush_cpu_write_domain(obj, true);
 
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 		 * hope for the best.
 		 */
 		WARN_ON(ret != -EIO);
-		i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, true);
 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
@@ -3217,7 +3225,8 @@ err_unpin:
 }
 
 void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			bool force)
 {
 	/* If we don't have a page list set up, then we're not pinned
 	 * to GPU, and we can ignore the cache flush because it'll happen
@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (obj->cache_level != I915_CACHE_NONE)
+	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
 		return;
 
 	trace_i915_gem_object_clflush(obj);
@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 
 /** Flushes the CPU write domain for the object if it's dirty. */
 static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+				       bool force)
 {
 	uint32_t old_write_domain;
 
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj);
+	i915_gem_clflush_object(obj, force);
 	i915_gem_chipset_flush(obj->base.dev);
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 					       obj, cache_level);
 	}
 
-	if (cache_level == I915_CACHE_NONE) {
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		vma->node.color = cache_level;
+	obj->cache_level = cache_level;
+
+	if (cpu_write_needs_clflush(obj)) {
 		u32 old_read_domains, old_write_domain;
 
 		/* If we're coming from LLC cached, then we haven't
@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 						    old_write_domain);
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
-		vma->node.color = cache_level;
-	obj->cache_level = cache_level;
 	i915_gem_verify_gtt(dev);
 	return 0;
 }
@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (ret)
 		goto err_unpin_display;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	i915_gem_object_flush_cpu_write_domain(obj, true);
 
 	old_write_domain = obj->base.write_domain;
 	old_read_domains = obj->base.read_domains;
@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
-			i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, false);
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	obj->user_pin_count++;
 	obj->pin_filp = file;
 
-	/* XXX - flush the CPU caches for pinned objects
-	 * as the X server doesn't manage domains yet
-	 */
-	i915_gem_object_flush_cpu_write_domain(obj);
 	args->offset = i915_gem_obj_ggtt_offset(obj);
 out:
 	drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8ccc29ac9629..e999578a021c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj);
+			i915_gem_clflush_object(obj, false);
 
 		flush_domains |= obj->base.write_domain;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 24fb989593f0..c9420c280cf0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 				       dev_priv->gtt.base.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, obj->pin_display);
 		i915_gem_gtt_bind_object(obj, obj->cache_level);
 	}
 

From d46f1c3f1372e3a72fab97c60480aa4a1084387f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Aug 2013 14:41:06 +0100
Subject: [PATCH 04/62] drm/i915: Allow the GPU to cache stolen memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a corollary to reviewing the interaction between LLC and our cache
domains, the GPU PTE bits are independent of the CPU PAT bits. As such
we can set the cache level on stolen memory based on how we wish the GPU
to cache accesses to it. So we are free to set the same default cache
levels as for normal bo, i.e. enable LLC cacheing by default where
appropriate.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index e68c4b5da46d..e20d64966c72 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -287,9 +287,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
 
-	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
-	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
-	obj->cache_level = I915_CACHE_NONE;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
+	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
 
 	return obj;
 

From 000433b67e46771a7c08f78574943855a98c53ec Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Aug 2013 14:41:09 +0100
Subject: [PATCH 05/62] drm/i915: Only do a chipset flush after a clflush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we skip clflushes more often, return a boolean indicating
whether the clflush was actually performed, and only if it was do the
chipset flush. (Though on most of the architectures where the clflush will
be skipped, the chipset flush is a no-op!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 20 +++++++++++---------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +++--
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34d3f2fae8ac..1af59d72ddc7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 }
 
 void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 54d76e9392d8..959dffba0f0e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -841,8 +841,8 @@ out:
 		 */
 		if (!needs_clflush_after &&
 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-			i915_gem_clflush_object(obj, obj->pin_display);
-			i915_gem_chipset_flush(dev);
+			if (i915_gem_clflush_object(obj, obj->pin_display))
+				i915_gem_chipset_flush(dev);
 		}
 	}
 
@@ -3224,7 +3224,7 @@ err_unpin:
 	return ret;
 }
 
-void
+bool
 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 			bool force)
 {
@@ -3233,14 +3233,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * again at bind time.
 	 */
 	if (obj->pages == NULL)
-		return;
+		return false;
 
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
 	if (obj->stolen)
-		return;
+		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
 	 * we do not need to manually clear the CPU cache lines.  However,
@@ -3251,11 +3251,12 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * tracking.
 	 */
 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
-		return;
+		return false;
 
 	trace_i915_gem_object_clflush(obj);
-
 	drm_clflush_sg(obj->pages);
+
+	return true;
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
@@ -3295,8 +3296,9 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj, force);
-	i915_gem_chipset_flush(obj->base.dev);
+	if (i915_gem_clflush_object(obj, force))
+		i915_gem_chipset_flush(obj->base.dev);
+
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e999578a021c..7dcf78cf6781 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -708,6 +708,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 {
 	struct drm_i915_gem_object *obj;
 	uint32_t flush_domains = 0;
+	bool flush_chipset = false;
 	int ret;
 
 	list_for_each_entry(obj, objects, exec_list) {
@@ -716,12 +717,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj, false);
+			flush_chipset |= i915_gem_clflush_object(obj, false);
 
 		flush_domains |= obj->base.write_domain;
 	}
 
-	if (flush_domains & I915_GEM_DOMAIN_CPU)
+	if (flush_chipset)
 		i915_gem_chipset_flush(ring->dev);
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)

From 35c7ab421a13f8327e3fd627c6ebafb1c13b2e55 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sat, 10 Aug 2013 14:51:11 +0200
Subject: [PATCH 06/62] drm/i915: reserve I915_CACHING_DISPLAY and document
 cache modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve the catch-22 of igt needing a stable number and patches first
needing testcases by reserving the interface number up-front.

v2: Improve the spelling a bit.

v3: More spelling fail spotted by Chris.

Requested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/uapi/drm/i915_drm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a1a7b6bd60d8..0bb3e5524382 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -768,8 +768,32 @@ struct drm_i915_gem_busy {
 	__u32 busy;
 };
 
+/**
+ * I915_CACHING_NONE
+ *
+ * GPU access is not coherent with cpu caches. Default for machines without an
+ * LLC.
+ */
 #define I915_CACHING_NONE		0
+/**
+ * I915_CACHING_CACHED
+ *
+ * GPU access is coherent with cpu caches and furthermore the data is cached in
+ * last-level caches shared between cpu cores and the gpu GT. Default on
+ * machines with HAS_LLC.
+ */
 #define I915_CACHING_CACHED		1
+/**
+ * I915_CACHING_DISPLAY
+ *
+ * Special GPU caching mode which is coherent with the scanout engines.
+ * Transparently falls back to I915_CACHING_NONE on platforms where no special
+ * cache mode (like write-through or gfdt flushing) is available. The kernel
+ * automatically sets this mode when using a buffer as a scanout target.
+ * Userspace can manually set this mode to avoid a costly stall and clflush in
+ * the hotpath of drawing the first frame.
+ */
+#define I915_CACHING_DISPLAY		2
 
 struct drm_i915_gem_caching {
 	/**

From 7ace7ef2f5d20632240196fa3e5d5c74cf2c1508 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Fri, 9 Aug 2013 22:12:12 -0700
Subject: [PATCH 07/62] drm/i915: WARN_ON failed map_and_fenceable

I just noticed in our code we don't really check the assertion, and
given some of the code I am changing in this area, I feel a WARN is very
nice to have.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: s/&/&&/ to fix typo on the check.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 959dffba0f0e..e414adaaf11d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3211,6 +3211,8 @@ search_free:
 	if (i915_is_ggtt(vm))
 		obj->map_and_fenceable = mappable && fenceable;
 
+	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
+
 	trace_i915_vma_bind(vma, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
 	return 0;

From 1ad87e72b54cf3b698c002f0a31ac34d6b407f0b Mon Sep 17 00:00:00 2001
From: Guillaume Clement <gclement@baobob.org>
Date: Sat, 10 Aug 2013 21:57:57 +0200
Subject: [PATCH 08/62] i915: Fix SDVO potentially turning off randomly

Some Poulsbo cards seem to incorrectly report
SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED instead of
SDVO_CMD_STATUS_PENDING, which causes the display to be turned off.

This could also happen to i915.

Signed-off-by: Guillaume Clement <gclement@baobob.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_sdvo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 02f220b4e4a1..317e058fb3cf 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -538,7 +538,8 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo,
 				  &status))
 		goto log_fail;
 
-	while (status == SDVO_CMD_STATUS_PENDING && --retry) {
+	while ((status == SDVO_CMD_STATUS_PENDING ||
+			status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && --retry) {
 		if (retry < 10)
 			msleep(15);
 		else

From c8b5018b22bc03941fbc6dcf7bea5e8344a8f3da Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Sun, 11 Aug 2013 12:44:26 +0300
Subject: [PATCH 09/62] drm/i915: remove unused leftover variable irq_received

It's been there since i8xx_irq_handler() was added in
commit c2798b19bac2538393fc932bfbe59807a4734b3e
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sun Apr 22 21:13:57 2012 +0100

    drm/i915: i8xx interrupt handler

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8a77faf4927d..34467ed4f9da 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2403,7 +2403,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 	u16 iir, new_iir;
 	u32 pipe_stats[2];
 	unsigned long irqflags;
-	int irq_received;
 	int pipe;
 	u16 flip_mask =
 		I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT |
@@ -2437,7 +2436,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 					DRM_DEBUG_DRIVER("pipe %c underrun\n",
 							 pipe_name(pipe));
 				I915_WRITE(reg, pipe_stats[pipe]);
-				irq_received = 1;
 			}
 		}
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);

From f2f4d82faf85d2e53a2ba00a831a9f7f80b7e6e7 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Sun, 11 Aug 2013 12:44:01 +0300
Subject: [PATCH 10/62] drm/i915: give more distinctive names to ring hangcheck
 action enums

The short lowercase names are bound to collide. The default warnings
don't even warn about shadowing.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c         |  2 +-
 drivers/gpu/drm/i915/i915_irq.c         | 22 +++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  7 ++++++-
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e414adaaf11d..474748ffa7b6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2214,7 +2214,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 		offset = i915_gem_obj_offset(request->batch_obj,
 					     request_to_vm(request));
 
-	if (ring->hangcheck.action != wait &&
+	if (ring->hangcheck.action != HANGCHECK_WAIT &&
 	    i915_request_guilty(request, acthd, &inside)) {
 		DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
 			  ring->name,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 34467ed4f9da..06659a72d0cf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1826,10 +1826,10 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
 	u32 tmp;
 
 	if (ring->hangcheck.acthd != acthd)
-		return active;
+		return HANGCHECK_ACTIVE;
 
 	if (IS_GEN2(dev))
-		return hung;
+		return HANGCHECK_HUNG;
 
 	/* Is the chip hanging on a WAIT_FOR_EVENT?
 	 * If so we can simply poke the RB_WAIT bit
@@ -1841,24 +1841,24 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd)
 		DRM_ERROR("Kicking stuck wait on %s\n",
 			  ring->name);
 		I915_WRITE_CTL(ring, tmp);
-		return kick;
+		return HANGCHECK_KICK;
 	}
 
 	if (INTEL_INFO(dev)->gen >= 6 && tmp & RING_WAIT_SEMAPHORE) {
 		switch (semaphore_passed(ring)) {
 		default:
-			return hung;
+			return HANGCHECK_HUNG;
 		case 1:
 			DRM_ERROR("Kicking stuck semaphore on %s\n",
 				  ring->name);
 			I915_WRITE_CTL(ring, tmp);
-			return kick;
+			return HANGCHECK_KICK;
 		case 0:
-			return wait;
+			return HANGCHECK_WAIT;
 		}
 	}
 
-	return hung;
+	return HANGCHECK_HUNG;
 }
 
 /**
@@ -1926,16 +1926,16 @@ static void i915_hangcheck_elapsed(unsigned long data)
 								    acthd);
 
 				switch (ring->hangcheck.action) {
-				case wait:
+				case HANGCHECK_WAIT:
 					score = 0;
 					break;
-				case active:
+				case HANGCHECK_ACTIVE:
 					score = BUSY;
 					break;
-				case kick:
+				case HANGCHECK_KICK:
 					score = KICK;
 					break;
-				case hung:
+				case HANGCHECK_HUNG:
 					score = HUNG;
 					stuck[i] = true;
 					break;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6e38256d41e1..5e6be842d225 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -37,7 +37,12 @@ struct  intel_hw_status_page {
 #define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
 #define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
 
-enum intel_ring_hangcheck_action { wait, active, kick, hung };
+enum intel_ring_hangcheck_action {
+	HANGCHECK_WAIT,
+	HANGCHECK_ACTIVE,
+	HANGCHECK_KICK,
+	HANGCHECK_HUNG,
+};
 
 struct intel_ring_hangcheck {
 	bool deadlock;

From ea04cb31d506ac3f4fc3cefb1c50eb4f35ab37fd Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Sun, 11 Aug 2013 12:44:02 +0300
Subject: [PATCH 11/62] drm/i915: drop unnecessary local variable to suppress
 build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Although I could not reproduce this (different compiler version,
perhaps), reportedly we get:

drivers/gpu/drm/i915/i915_irq.c:1943:27: warning: ‘score’ may be used
uninitialized in this function [-Wuninitialized]

Drop the 'score' variable altogether as it's not really needed.

Reported-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 06659a72d0cf..28d57477aa42 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1905,8 +1905,6 @@ static void i915_hangcheck_elapsed(unsigned long data)
 				} else
 					busy = false;
 			} else {
-				int score;
-
 				/* We always increment the hangcheck score
 				 * if the ring is busy and still processing
 				 * the same request, so that no single request
@@ -1927,20 +1925,18 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
 				switch (ring->hangcheck.action) {
 				case HANGCHECK_WAIT:
-					score = 0;
 					break;
 				case HANGCHECK_ACTIVE:
-					score = BUSY;
+					ring->hangcheck.score += BUSY;
 					break;
 				case HANGCHECK_KICK:
-					score = KICK;
+					ring->hangcheck.score += KICK;
 					break;
 				case HANGCHECK_HUNG:
-					score = HUNG;
+					ring->hangcheck.score += HUNG;
 					stuck[i] = true;
 					break;
 				}
-				ring->hangcheck.score += score;
 			}
 		} else {
 			/* Gradually reduce the count so that we catch DoS

From 651d794fae9b79237aae1c97f8a9d9f3817bd31d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Aug 2013 14:41:10 +0100
Subject: [PATCH 12/62] drm/i915: Use Write-Through cacheing for the display
 plane on Iris
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Haswell GT3e has the unique feature of supporting Write-Through cacheing
of objects within the eLLC/LLC. The purpose of this is to enable the display
plane to remain coherent whilst objects lie resident in the eLLC/LLC - so
that we, in theory, get the best of both worlds, perfect display and fast
access.

However, we still need to be careful as the CPU does not see the WT when
accessing the cache. In particular, this means that we need to flush the
cache lines after writing to an object through the CPU, and on
transitioning from a cached state to WT.

v2: Actually do the clflush on transition to WT, nagging by Ville.
v3: Flush the CPU cache after writes into WT objects.
v4: Rease onto LLC updates and report WT as "uncached" for
get_cache_level_ioctl to remain symmetric with set_cache_level_ioctl.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c     |  3 +++
 drivers/gpu/drm/i915/i915_drv.h     |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c     | 14 ++++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++-
 include/uapi/drm/i915_drm.h         |  1 +
 5 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ce098c3ccc00..f4231185ec7d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev);
 		break;
+	case I915_PARAM_HAS_WT:
+		value = HAS_WT(dev);
+		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
 		value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1af59d72ddc7..6d07467d0e7e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -454,6 +454,7 @@ enum i915_cache_level {
 			      caches, eg sampler/render caches, and the
 			      large Last-Level-Cache. LLC is coherent with
 			      the CPU, but L3 is only visible to the GPU. */
+	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
 };
 
 typedef uint32_t gen6_gtt_pte_t;
@@ -1385,7 +1386,7 @@ struct drm_i915_gem_object {
 	unsigned int pending_fenced_gpu_access:1;
 	unsigned int fenced_gpu_access:1;
 
-	unsigned int cache_level:2;
+	unsigned int cache_level:3;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
 	unsigned int has_global_gtt_mapping:1;
@@ -1530,6 +1531,7 @@ struct drm_i915_file_private {
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
+#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 474748ffa7b6..4064fdf15abe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3471,7 +3471,16 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
-	args->caching = obj->cache_level != I915_CACHE_NONE;
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
 
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -3565,7 +3574,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * of uncaching, which would allow us to flush all the LLC-cached data
 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 	 */
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
 	if (ret)
 		goto err_unpin_display;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c9420c280cf0..212f6d8c35ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -55,6 +55,7 @@
 #define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
 #define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
@@ -138,8 +139,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
-	if (level != I915_CACHE_NONE)
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE0;
+		break;
+	default:
 		pte |= HSW_WB_ELLC_LLC_AGE0;
+		break;
+	}
 
 	return pte;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0bb3e5524382..55bb5729bd78 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -334,6 +334,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PINNED_BATCHES	 24
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
+#define I915_PARAM_HAS_WT     	 	 27
 
 typedef struct drm_i915_getparam {
 	int param;

From 4257d3ba3b87a84adb2f840620cb63512f0bab22 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Aug 2013 14:41:11 +0100
Subject: [PATCH 13/62] drm/i915: Allow the user to set bo into the DISPLAY
 cache domain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is primarily for the benefit of the create2 ioctl so that the
caller can avoid the later step of rebinding the bo with new PTE bits.
After introducing WT (and possibly GFDT) cacheing for display targets,
not everything in the display is earmarked as UC, and more importantly
what is is controlled by the kernel.

Note that set_cache_level/get_cache_level for DISPLAY is not necessarily
idempotent; get_cache_level may return UC for architectures that have no
special cache domain for the display engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4064fdf15abe..5f48ecc77ec2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3477,6 +3477,10 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 		args->caching = I915_CACHING_CACHED;
 		break;
 
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
 	default:
 		args->caching = I915_CACHING_NONE;
 		break;
@@ -3503,6 +3507,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	case I915_CACHING_CACHED:
 		level = I915_CACHE_LLC;
 		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
 	default:
 		return -EINVAL;
 	}

From f3f08572fc245bc0cf5f102473ce0f54e693831d Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 12 Aug 2013 14:56:53 -0300
Subject: [PATCH 14/62] drm/i915: remove set but unused variables

Caught by "make W=1 drivers/gpu/drm/i915/".

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 12 ++----------
 drivers/gpu/drm/i915/intel_dp.c      |  3 ---
 drivers/gpu/drm/i915/intel_hdmi.c    |  2 --
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bafdc3e21e87..d2dec3ff308a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -690,7 +690,7 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
 {
 	u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
 	u32 m, n, fastclk;
-	u32 updrate, minupdate, fracbits, p;
+	u32 updrate, minupdate, p;
 	unsigned long bestppm, ppm, absppm;
 	int dotclk, flag;
 
@@ -701,7 +701,6 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
 	fastclk = dotclk / (2*100);
 	updrate = 0;
 	minupdate = 19200;
-	fracbits = 1;
 	n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
 	bestm1 = bestm2 = bestp1 = bestp2 = 0;
 
@@ -4423,13 +4422,10 @@ static void vlv_update_pll(struct intel_crtc *crtc)
 	int pipe = crtc->pipe;
 	u32 dpll, mdiv;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2;
-	bool is_hdmi;
 	u32 coreclk, reg_val, dpll_md;
 
 	mutex_lock(&dev_priv->dpio_lock);
 
-	is_hdmi = intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI);
-
 	bestn = crtc->config.dpll.n;
 	bestm1 = crtc->config.dpll.m1;
 	bestm2 = crtc->config.dpll.m2;
@@ -8894,14 +8890,13 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 	struct drm_crtc *new_crtc;
 	struct intel_connector *connector;
 	struct intel_encoder *encoder;
-	int count, ro;
+	int ro;
 
 	/* The upper layers ensure that we either disable a crtc or have a list
 	 * of connectors. For paranoia, double-check this. */
 	WARN_ON(!set->fb && (set->num_connectors != 0));
 	WARN_ON(set->fb && (set->num_connectors == 0));
 
-	count = 0;
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    base.head) {
 		/* Otherwise traverse passed in connector list and get encoders
@@ -8935,7 +8930,6 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 	/* connector->new_encoder is now updated for all connectors. */
 
 	/* Update crtc of enabled connectors. */
-	count = 0;
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    base.head) {
 		if (!connector->new_encoder)
@@ -10295,7 +10289,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct intel_crtc *intel_crtc;
 
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
@@ -10319,7 +10312,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 		if (!crtc->fb)
 			continue;
 
-		intel_crtc = to_intel_crtc(crtc);
 		intel_increase_pllclock(crtc);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 63b6722d4285..2726d4d41722 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2326,7 +2326,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 	struct drm_device *dev = encoder->dev;
 	int i;
 	uint8_t voltage;
-	bool clock_recovery = false;
 	int voltage_tries, loop_tries;
 	uint32_t DP = intel_dp->DP;
 
@@ -2344,7 +2343,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 	voltage = 0xff;
 	voltage_tries = 0;
 	loop_tries = 0;
-	clock_recovery = false;
 	for (;;) {
 		/* Use intel_dp->train_set[0] to set the voltage and pre emphasis values */
 		uint8_t	    link_status[DP_LINK_STATUS_SIZE];
@@ -2365,7 +2363,6 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 
 		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
 			DRM_DEBUG_KMS("clock recovery OK\n");
-			clock_recovery = true;
 			break;
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 88562913fb7f..94179fdf61f5 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1232,7 +1232,6 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
 {
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
-	struct drm_encoder *encoder;
 	struct intel_connector *intel_connector;
 
 	intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL);
@@ -1246,7 +1245,6 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
 	}
 
 	intel_encoder = &intel_dig_port->base;
-	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
 			 DRM_MODE_ENCODER_TMDS);

From a1d95703b7fa5cbc4abf53f63df51c49cbacc7b6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 13 Aug 2013 18:48:47 +0100
Subject: [PATCH 15/62] drm/i915: Print the changes required for modeset

After computing the stage changes for the set_config, record those in
the debug log.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d2dec3ff308a..a87bb93a0f63 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8880,6 +8880,9 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set,
 		drm_mode_debug_printmodeline(set->mode);
 		config->mode_changed = true;
 	}
+
+	DRM_DEBUG_KMS("computed changes for [CRTC:%d], mode_changed=%d, fb_changed=%d\n",
+			set->crtc->base.id, config->mode_changed, config->fb_changed);
 }
 
 static int

From ed1c9e2cf414e32cb7ea1217b51b39e70fc132d2 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 12 Aug 2013 14:34:08 -0300
Subject: [PATCH 16/62] drm/i915: print a message when we detect an early
 Haswell SDV

The machines that fall in this category are the SDVs that have a PCI
ID starting with 0x0C. These are very early pre-production machines
and may not fully work. Other Haswell SDVs have PCI IDs that match the
real Haswell machines and we expect them to work better.

Even though they have problems, they still mostly work so I don't see
a reason to refuse loading our driver. But I do see a reason to reject
bug reports from these machines, so the message should help the bug
triagers.

As far as I know, we don't implement some workarounds that are
specific to these machines and suspend/resume may not work on most of
them, but besides this, they may work.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61508
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c | 8 ++++++++
 drivers/gpu/drm/i915/i915_drv.h | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f4231185ec7d..d4c176b7d76e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1488,6 +1488,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	i915_dump_device_info(dev_priv);
 
+	/* Not all pre-production machines fall into this category, only the
+	 * very first ones. Almost everything should work, except for maybe
+	 * suspend/resume. And we don't implement workarounds that affect only
+	 * pre-production machines. */
+	if (IS_HSW_EARLY_SDV(dev))
+		DRM_INFO("This is an early pre-production Haswell machine. "
+			 "It may not be fully functional.\n");
+
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
 		goto free_priv;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6d07467d0e7e..2e7d5f9524f7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1511,6 +1511,8 @@ struct drm_i915_file_private {
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
+#define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
+				 ((dev)->pci_device & 0xFF00) == 0x0C00)
 #define IS_ULT(dev)		(IS_HASWELL(dev) && \
 				 ((dev)->pci_device & 0xFF00) == 0x0A00)
 

From 351aa5666d02062b52329bcfe4bcf9d1f882fba9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= <marcheu@chromium.org>
Date: Tue, 13 Aug 2013 11:55:17 -0700
Subject: [PATCH 17/62] drm/i915: tune the RC6 threshold for stability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's basically the same deal as the RC6+ issues on ivy bridge
except this time with RC6 on sandy bridge. Like last time the
core of the issue is that the timings don't work 100% with our
voltage regulator. So from time to time, the kernel will print
a warning message about the GPU not getting out of RC6. In
particular, I found this fairly easy to reproduce during
suspend/resume.

Changing the threshold to 125000 instead of 50000 seems to fix
the issue. The previous patch used 150000 but as it turns out
this doesn't work everywhere. After getting such a machine, I
bisected the highest value which works, which is 125000, so here
it is.

I also measured the idle power usage before/after this patch and
didn't see a difference on a sandy bridge laptop. On haswell and
up, it makes a big difference, so we want to keep it at 50k
there. It also seems like haswell doesn't have the RC6 issues
that sandy bridge has so the 50k value is fine.

Signed-off-by: Stéphane Marchesin <marcheu@chromium.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_pm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3ac5fe9d428a..76150818b5cb 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3508,7 +3508,10 @@ static void gen6_enable_rps(struct drm_device *dev)
 
 	I915_WRITE(GEN6_RC_SLEEP, 0);
 	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-	I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
+	if (INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev))
+		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
+	else
+		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
 	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 

From 5020150b3b8d2912466e28572f25b3cc56722aec Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Mon, 12 Aug 2013 16:53:03 -0700
Subject: [PATCH 18/62] drm/i915: Initialize seqno for VECS too

We require n-1 mailboxes for proper semaphore synchronization. All
semaphore synchronization code relies on proper values in these
mailboxes. The fact that we failed to touch the vebox ring by itself
was unlikely to be an issue since the HW should be initializing the
values to 0. However the error framework for testing seqno wrap
introduced by Mika, in addition to the hangcheck via seqno, and
i915_error_first_batchbuffer() combined caused a nice explosion.

The problem is caused by seqno wrap because the wrap condition is not
properly setup. The wrap code attempts to set the sync mailboxes all
to 0, and then set the current seqno to one less than 0. In all cases,
the vebox mailbox wasn't properly being initialized. This caused a
wrap to not occur. When hangcheck kicks in with the bogus seqno
values, the rest just doesn't work. It makes me wonder if we shouldn't
consider a dumber version of hangcheck...

How we messed this up: VECS support was written before the
aforementioned other features. Upon VECS being rebased, these facts
were missed.

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65387
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67198
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 74d02a704515..34777168f700 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1594,6 +1594,8 @@ void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
 	if (INTEL_INFO(ring->dev)->gen >= 6) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
 		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
+		if (HAS_VEBOX(ring->dev))
+			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
 	}
 
 	ring->set_seqno(ring, seqno);

From 4e5aabfd3106cfd68694db416e271996aadf114a Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Mon, 12 Aug 2013 16:53:04 -0700
Subject: [PATCH 19/62] drm/i915: Get VECS semaphore info on error

Ideally we could use for_each_ring with the ring flags as I've done a
couple times
(http://lists.freedesktop.org/archives/intel-gfx/2013-June/029450.html).
Until Daniel merges that patch though, we can just use this.

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 60393cb9a7c7..558e568d5b45 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -243,6 +243,11 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
 			   error->semaphore_mboxes[ring][1],
 			   error->semaphore_seqno[ring][1]);
+		if (HAS_VEBOX(dev)) {
+			err_printf(m, "  SYNC_2: 0x%08x [last synced 0x%08x]\n",
+				   error->semaphore_mboxes[ring][2],
+				   error->semaphore_seqno[ring][2]);
+		}
 	}
 	err_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
 	err_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
@@ -682,6 +687,12 @@ static void i915_record_ring_state(struct drm_device *dev,
 		error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1];
 	}
 
+	if (HAS_VEBOX(dev)) {
+		error->semaphore_mboxes[ring->id][2] =
+			I915_READ(RING_SYNC_2(ring->mmio_base));
+		error->semaphore_seqno[ring->id][2] = ring->sync_seqno[2];
+	}
+
 	if (INTEL_INFO(dev)->gen >= 4) {
 		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
 		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));

From 4a025e26a2979193739f46e391ffc05cf0637d90 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Aug 2013 10:01:32 +0200
Subject: [PATCH 20/62] drm/i915: clarify error paths in
 create_stolen_for_preallocated

Use the standard inversely ordered goto label stack for everything.
Spotted while reviewing place where we might need to to call
vma_destroy but failed to do so.

Cc: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index e20d64966c72..7f4c510a751b 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -392,8 +392,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 		ret = drm_mm_reserve_node(&ggtt->mm, &vma->node);
 		if (ret) {
 			DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
-			i915_gem_vma_destroy(vma);
-			goto err_out;
+			goto err_vma;
 		}
 	}
 
@@ -404,6 +403,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 
 	return obj;
 
+err_vma:
+	i915_gem_vma_destroy(vma);
 err_out:
 	drm_mm_put_block(stolen);
 	drm_gem_object_unreference(&obj->base);

From 433544bd25b06cb6dcdb79b6da8d748a0220898e Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Aug 2013 18:09:06 -0700
Subject: [PATCH 21/62] drm/i915: Remove node only when allocated

VMAs can be created and not bound. One may think of it as lazy cleanup,
and safely gloss over the conditions which manufacture it. In either
case, when the object backing the i915 vma is destroyed, we must cleanup
the vma without stumbling into a bunch of pitfalls that assume the vma
is bound.

NOTE: I was pretty certain the above condition could only happen when we
introduced the use of VMAs being looked up at execbuf, and already
existing. Paulo has hit this though, so I must be missing something. As
I believe the patch is correct anyway, therefore I won't scratch my head
too hard.

v2: use goto destroy as a compromise (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5f48ecc77ec2..910fbaff0e99 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2619,6 +2619,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	if (list_empty(&vma->vma_link))
 		return 0;
 
+	if (!drm_mm_node_allocated(&vma->node))
+		goto destroy;
+
 	if (obj->pin_count)
 		return -EBUSY;
 
@@ -2656,6 +2659,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 		obj->map_and_fenceable = true;
 
 	drm_mm_remove_node(&vma->node);
+
+destroy:
 	i915_gem_vma_destroy(vma);
 
 	/* Since the unbound list is global, only move to that list if

From 4bd561b3e8d7d2407cf465cb79c51a1ff1264343 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Aug 2013 18:09:07 -0700
Subject: [PATCH 22/62] drm/i915: cleanup map&fence in bind

Cleanup the map and fenceable setting during bind to make more sense,
and not check i915_is_ggtt() 2 unnecessary times

v2: Move the bools into the if block (Chris) - There are ways to tidy
this function (fence calculations for instance) even further, but they
are quite invasive, so I am punting on those unless specifically asked.

v3: Add newline between variable declaration and logic (Chris)

Recommended-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 910fbaff0e99..6cb4467005c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3119,7 +3119,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
-	bool mappable, fenceable;
 	size_t gtt_max =
 		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
 	struct i915_vma *vma;
@@ -3203,18 +3202,18 @@ search_free:
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	list_add_tail(&vma->mm_list, &vm->inactive_list);
 
-	fenceable =
-		i915_is_ggtt(vm) &&
-		i915_gem_obj_ggtt_size(obj) == fence_size &&
-		(i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0;
+	if (i915_is_ggtt(vm)) {
+		bool mappable, fenceable;
 
-	mappable =
-		i915_is_ggtt(vm) &&
-		vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end;
+		fenceable =
+			i915_gem_obj_ggtt_size(obj) == fence_size &&
+			(i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0;
+
+		mappable =
+			vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end;
 
-	/* Map and fenceable only changes if the VM is the global GGTT */
-	if (i915_is_ggtt(vm))
 		obj->map_and_fenceable = mappable && fenceable;
+	}
 
 	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
 

From 49987099e2cfce4eda5d428e2618fd4e93aba597 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Aug 2013 10:21:23 +0200
Subject: [PATCH 23/62] drm/i915: use vma->node directly and rewrap map&fence
 in bind

Use () to make for neater alignment of the split lines, too. With this
we ditch another jump through the obj_gtt_size/offset indirection
maze.

Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6cb4467005c0..fd4872497a72 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3205,12 +3205,11 @@ search_free:
 	if (i915_is_ggtt(vm)) {
 		bool mappable, fenceable;
 
-		fenceable =
-			i915_gem_obj_ggtt_size(obj) == fence_size &&
-			(i915_gem_obj_ggtt_offset(obj) & (fence_alignment - 1)) == 0;
+		fenceable = (vma->node.size == fence_size &&
+			     (vma->node.start & (fence_alignment - 1)) == 0);
 
-		mappable =
-			vma->node.start + obj->base.size <= dev_priv->gtt.mappable_end;
+		mappable = (vma->node.start + obj->base.size <=
+			    dev_priv->gtt.mappable_end);
 
 		obj->map_and_fenceable = mappable && fenceable;
 	}

From 3ef80a818bce56fb4a7ed4465a8fc8372085b9a4 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Aug 2013 18:09:08 -0700
Subject: [PATCH 24/62] drm: WARN when removing unallocated node

The conditional is usually a recoverable driver bug, and so WARNing, and
preventing the drm_mm code from doing potential damage (BUG) is
desirable.

This issue was hit and fixed twice while developing the i915 multiple
address space code. The first fix is the patch just before this, and is
hit on an not frequently occuring error path. Another was fixed during
patch iteration, so it's hard to see from the patch:

commit c6cfb325677ea6305fb19acf3a4d14ea267f923e
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Fri Jul 5 14:41:06 2013 -0700

    drm/i915: Embed drm_mm_node in i915 gem obj

From the intel-gfx mailing list, we discussed this:
References: <20130705191235.GA3057@bwidawsk.net>

Cc: Dave Airlie <airlied@redhat.com>
CC: <dri-devel@lists.freedesktop.org>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_mm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index fe304f903b13..feb267f37e21 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -346,6 +346,9 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
 
+	if (WARN_ON(!node->allocated))
+		return;
+
 	BUG_ON(node->scanned_block || node->scanned_prev_free
 				   || node->scanned_next_free);
 

From 4b6d846e9a20ac8c9dd641d0ea875c28f331e241 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 12 Aug 2013 11:46:17 +0100
Subject: [PATCH 25/62] drm/i915: Drop the overzealous warning from
 i915_gem_set_cache_level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By our earlier reckoning, move from a snooped/llc setting to an uncached
setting, leaves the CPU cache in a consistent state irrespective of our
domain tracking - so we can forgo the warning about the lack of
invalidation. Similarly for any writes posted to the snooped CPU domain,
we know will be safely clflushed to the uncached PTEs after forcing the
domain change.

This WARN started to pop up with

commit d46f1c3f1372e3a72fab97c60480aa4a1084387f
Author:     Chris Wilson <chris@chris-wilson.co.uk>
AuthorDate: Thu Aug 8 14:41:06 2013 +0100

    drm/i915: Allow the GPU to cache stolen memory

Ville brought up a scenario where the interaction of a set_caching
ioctl call from userspace on a scanout buffer (i.e. obj->pin_display
is set) resulted in the code getting confused and not properly
flushing stale cpu cachelines. Luckily we already prevent this by
rejecting caching changes when obj->pin_count is set.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68040
Tested-by: cancan,feng <cancan.feng@intel.com>
[danvet: Add buglink, bisect result and explain why Ville's scenario
is already taken care of.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd4872497a72..41dc04293584 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3440,7 +3440,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 * Just set it to the CPU cache for now.
 		 */
 		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
-		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
 
 		old_read_domains = obj->base.read_domains;
 		old_write_domain = obj->base.write_domain;

From 8dc8a27c9733a41cda84e8c70da8313e1d54c4ae Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 2 Aug 2013 16:22:24 -0300
Subject: [PATCH 26/62] drm/i915: check the power well when redisabling VGA

If the power well is disabled VGA is guaranteed to be disabled.

This fixes unclaimed register messages that happen on suspend/resume.

v2: Check the actual hw power well state instead of our own tracking
to make sure VGA is _really_ off (in case the BIOS/KVMr has just its
own request bit set). Requested by Ville.

Note: Ville suggested whether it wouldn't be better to just enable the
power well over a slightly longer time in our resume code, since we
already do that. I tend to agree, but there's also the modeset force
code in the lid notifier which _also_ eventually calls redisable_vga.
We shouldn't ever need this on somewhat modern hw (everything with
opregion essentially) but the code to bail out isn't there. Hence
stick with this simple approach here for now.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67517
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
[danvet: Summarize the discussion around the resume sequence and lid
notifier a bit.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a87bb93a0f63..f83316ec0b94 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10104,6 +10104,17 @@ void i915_redisable_vga(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 vga_reg = i915_vgacntrl_reg(dev);
 
+	/* This function can be called both from intel_modeset_setup_hw_state or
+	 * at a very early point in our resume sequence, where the power well
+	 * structures are not yet restored. Since this function is at a very
+	 * paranoid "someone might have enabled VGA while we were not looking"
+	 * level, just check if the power well is enabled instead of trying to
+	 * follow the "don't touch the power well if we don't need it" policy
+	 * the rest of the driver uses. */
+	if (HAS_POWER_WELL(dev) &&
+	    (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE) == 0)
+		return;
+
 	if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
 		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
 		i915_disable_vga(dev);

From 6aedd1f539f51b7b0c3d6be0088c3541f9d2c294 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 2 Aug 2013 16:22:25 -0300
Subject: [PATCH 27/62] drm/i915: clarify Haswell power well bit names

Whenever I need to work with the HSW_PWER_WELL_* register bits I have
to look at the documentation to find out which bit is to request the
power well and which one shows its current state. Rename the bits so I
won't need to look the docs every time.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h      |  4 ++--
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c      | 13 +++++++------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fab94be89dfa..3c652eb7ee6c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4825,8 +4825,8 @@
 #define HSW_PWR_WELL_DRIVER			0x45404 /* CTL2 */
 #define HSW_PWR_WELL_KVMR			0x45408 /* CTL3 */
 #define HSW_PWR_WELL_DEBUG			0x4540C /* CTL4 */
-#define   HSW_PWR_WELL_ENABLE			(1<<31)
-#define   HSW_PWR_WELL_STATE			(1<<30)
+#define   HSW_PWR_WELL_ENABLE_REQUEST		(1<<31)
+#define   HSW_PWR_WELL_STATE_ENABLED		(1<<30)
 #define HSW_PWR_WELL_CTL5			0x45410
 #define   HSW_PWR_WELL_ENABLE_SINGLE_STEP	(1<<31)
 #define   HSW_PWR_WELL_PWR_GATE_OVERRIDE	(1<<20)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f83316ec0b94..e600e1cfb6ea 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10112,7 +10112,7 @@ void i915_redisable_vga(struct drm_device *dev)
 	 * follow the "don't touch the power well if we don't need it" policy
 	 * the rest of the driver uses. */
 	if (HAS_POWER_WELL(dev) &&
-	    (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE) == 0)
+	    (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE_ENABLED) == 0)
 		return;
 
 	if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 76150818b5cb..0d90064775c9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5285,7 +5285,7 @@ bool intel_display_power_enabled(struct drm_device *dev,
 	case POWER_DOMAIN_TRANSCODER_B:
 	case POWER_DOMAIN_TRANSCODER_C:
 		return I915_READ(HSW_PWR_WELL_DRIVER) ==
-		       (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
+		     (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
 	default:
 		BUG();
 	}
@@ -5298,17 +5298,18 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable)
 	uint32_t tmp;
 
 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
-	is_enabled = tmp & HSW_PWR_WELL_STATE;
-	enable_requested = tmp & HSW_PWR_WELL_ENABLE;
+	is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
+	enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
 
 	if (enable) {
 		if (!enable_requested)
-			I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE);
+			I915_WRITE(HSW_PWR_WELL_DRIVER,
+				   HSW_PWR_WELL_ENABLE_REQUEST);
 
 		if (!is_enabled) {
 			DRM_DEBUG_KMS("Enabling power well\n");
 			if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
-				      HSW_PWR_WELL_STATE), 20))
+				      HSW_PWR_WELL_STATE_ENABLED), 20))
 				DRM_ERROR("Timeout enabling power well\n");
 		}
 	} else {
@@ -5410,7 +5411,7 @@ void intel_init_power_well(struct drm_device *dev)
 
 	/* We're taking over the BIOS, so clear any requests made by it since
 	 * the driver is in charge now. */
-	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE)
+	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
 		I915_WRITE(HSW_PWR_WELL_BIOS, 0);
 }
 

From fd547d25a8ac3f390fee4a689de86a64e3d65fe1 Mon Sep 17 00:00:00 2001
From: Vinit Azad <vinit.azad@intel.com>
Date: Wed, 14 Aug 2013 13:34:33 -0700
Subject: [PATCH 28/62] drm/i915: Only unmask required PM interrupts

Un-masking all PM interrupts causes hardware to generate
interrupts regardless of whether the interrupts are enabled
on the DE side. Since turbo only need up/down threshold and
rc6 timeout interrupt, mask all other interrupts bits to avoid
unnecessary overhead/wake up.

Note that our interrupt handler isn't being fired since we do set the
IER bits properly (IIR bits aren't set). The overhead isn't because
our driver is reacting to these interrupts, but because hardware keeps
generating internal messages when PMINTRMSK doesn't mask out the
up/down EI interrupts (which happen periodically).

Change-Id: I6c947df6fd5f60584d39b9e8b8c89faa51a5e827
Signed-off-by: Vinit Azad <vinit.azad@intel.com>
[danvet: Add follow-up explanation of the precise effects from Vinit
as a note to the commit message.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_pm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0d90064775c9..7bc3f1783174 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3453,8 +3453,8 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
 	I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 	spin_unlock_irq(&dev_priv->irq_lock);
-	/* unmask all PM interrupts */
-	I915_WRITE(GEN6_PMINTRMSK, 0);
+	/* only unmask PM interrupts we need. Mask all others. */
+	I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS);
 }
 
 static void gen6_enable_rps(struct drm_device *dev)

From f214266c0d147c0a2608caafc43c832f1738f0a9 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 8 Aug 2013 09:10:37 +0200
Subject: [PATCH 29/62] drm/i915: unpin backing storage in dmabuf_unmap

This fixes a WARN in i915_gem_free_object when the
obj->pages_pin_count isn't 0.

v2: Add locking to unmap, noticed by Chris Wilson. Note that even
though we call unmap with our own dev->struct_mutex held that won't
result in an immediate deadlock since we never go through the dma_buf
interfaces for our own, reimported buffers. But it's still easy to
blow up and anger lockdep, but that's already the case with our ->map
implementation. Fixing this for real will involve per dma-buf ww mutex
locking by the callers. And lots of fun. So go with the duct-tape
approach for now.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Tested-by: Armin K. <krejzi@email.com> (v1)
Acked-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index dc53a527126b..9e6578330801 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -85,9 +85,17 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
 				   struct sg_table *sg,
 				   enum dma_data_direction dir)
 {
+	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+
 	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
 	sg_free_table(sg);
 	kfree(sg);
+
+	i915_gem_object_unpin_pages(obj);
+
+	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
 static void i915_gem_dmabuf_release(struct dma_buf *dma_buf)

From 608806a549c656c925eeb253cbed768535f26e41 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 8 Aug 2013 09:10:38 +0200
Subject: [PATCH 30/62] drm/i915: explicit store base gem object in
 dma_buf->priv

Makes it more obviously correct what tricks we play by reusing the drm
prime release helper.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 9e6578330801..938eb341054c 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -27,10 +27,15 @@
 #include "i915_drv.h"
 #include <linux/dma-buf.h>
 
+static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
+{
+	return to_intel_bo(buf->priv);
+}
+
 static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
 					     enum dma_data_direction dir)
 {
-	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
 	struct sg_table *st;
 	struct scatterlist *src, *dst;
 	int ret, i;
@@ -85,7 +90,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
 				   struct sg_table *sg,
 				   enum dma_data_direction dir)
 {
-	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
 
 	mutex_lock(&obj->base.dev->struct_mutex);
 
@@ -111,7 +116,7 @@ static void i915_gem_dmabuf_release(struct dma_buf *dma_buf)
 
 static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	struct sg_page_iter sg_iter;
 	struct page **pages;
@@ -159,7 +164,7 @@ error:
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	int ret;
 
@@ -202,7 +207,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	int ret;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
@@ -233,9 +238,7 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags);
+	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
@@ -272,7 +275,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 
 	/* is this one of own objects? */
 	if (dma_buf->ops == &i915_dmabuf_ops) {
-		obj = dma_buf->priv;
+		obj = dma_buf_to_obj(dma_buf);
 		/* is it from our device? */
 		if (obj->base.dev == dev) {
 			/*

From 79f8dea13391f8220470997f9a5213ab5aa9f1c7 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Wed, 14 Aug 2013 14:40:37 -0300
Subject: [PATCH 31/62] drm/i915: enable the power well before module unload

Our driver initialization doesn't seem to be ready to load when the
power well is disabled: we hit a few "Unclaimed register" messages. So
do just like we already do for the suspend/resume path: enable the
power well before unloading.

At some point we'll want to be able to survive suspend/resume and
load/unload with the power well disabled, but for now let's just fix
the regression.

Regression introduced by the following commit:

commit bf51d5e2cda5d36d98e4b46ac7fca9461e512c41
Author: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date:   Wed Jul 3 17:12:13 2013 -0300
    drm/i915: switch disable_power_well default value to 1

Bug can be reproduced by running the "module_reload" script from
intel-gpu-tools.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67813
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d4c176b7d76e..5a051eaab9ef 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1688,8 +1688,13 @@ int i915_driver_unload(struct drm_device *dev)
 
 	intel_gpu_ips_teardown();
 
-	if (HAS_POWER_WELL(dev))
+	if (HAS_POWER_WELL(dev)) {
+		/* The i915.ko module is still not prepared to be loaded when
+		 * the power well is not enabled, so just enable it in case
+		 * we're going to unload/reload. */
+		intel_set_power_well(dev, true);
 		i915_remove_power_well(dev);
+	}
 
 	i915_teardown_sysfs(dev);
 

From 99486b8e6140da7721c932e708a6c17dc1dd970a Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Tue, 13 Aug 2013 16:23:17 -0700
Subject: [PATCH 32/62] i915: Add a Kconfig option to turn on
 i915.preliminary_hw_support by default

When building kernels for a preliminary hardware target, having to add a
kernel command-line option can prove inconvenient.  Add a Kconfig option
that changes the default of this option to 1.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
[danvet: Pimp the Kconfig help text a bit as suggested by Damien in
his 2nd review.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/Kconfig         | 11 +++++++++++
 drivers/gpu/drm/i915/i915_drv.c |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index a7c54c843291..cd4246b480d4 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -168,6 +168,17 @@ config DRM_I915_KMS
 	  the driver to bind to PCI devices, which precludes loading things
 	  like intelfb.
 
+config DRM_I915_PRELIMINARY_HW_SUPPORT
+	bool "Enable preliminary support for prerelease Intel hardware by default"
+	depends on DRM_I915
+	help
+	  Choose this option if you have prerelease Intel hardware and want the
+	  i915 driver to support it by default.  You can enable such support at
+	  runtime with the module option i915.preliminary_hw_support=1; this
+	  option changes the default for that module option.
+
+	  If in doubt, say "N".
+
 config DRM_MGA
 	tristate "Matrox g200/g400"
 	depends on DRM && PCI
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 01d63a0435fb..fd9fb2c25691 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -122,10 +122,10 @@ int i915_enable_psr __read_mostly = 0;
 module_param_named(enable_psr, i915_enable_psr, int, 0600);
 MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
 
-unsigned int i915_preliminary_hw_support __read_mostly = 0;
+unsigned int i915_preliminary_hw_support __read_mostly = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT);
 module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600);
 MODULE_PARM_DESC(preliminary_hw_support,
-		"Enable preliminary hardware support. (default: false)");
+		"Enable preliminary hardware support.");
 
 int i915_disable_power_well __read_mostly = 1;
 module_param_named(disable_power_well, i915_disable_power_well, int, 0600);

From b25cb2f8828aca6204d9c93d4d677f27e3ae9fa6 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Wed, 14 Aug 2013 11:38:33 +0200
Subject: [PATCH 33/62] drm/i915: s/obj->exec_list/obj->obj_exec_link in
 debugfs

To convert the execbuf code over to use vmas natively we need to
shuffle the exec_list a bit. This patch here just prepares things with
the debugfs code, which also uses the old exec_list list_head, newly
called obj_exec_link.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Split out from Ben's big patch.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 ++++++------
 drivers/gpu/drm/i915/i915_drv.h     |  2 ++
 drivers/gpu/drm/i915/i915_gem.c     |  1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index eb87865c20d4..4785d8c14654 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -195,9 +195,9 @@ static int obj_rank_by_stolen(void *priv,
 			      struct list_head *A, struct list_head *B)
 {
 	struct drm_i915_gem_object *a =
-		container_of(A, struct drm_i915_gem_object, exec_list);
+		container_of(A, struct drm_i915_gem_object, obj_exec_link);
 	struct drm_i915_gem_object *b =
-		container_of(B, struct drm_i915_gem_object, exec_list);
+		container_of(B, struct drm_i915_gem_object, obj_exec_link);
 
 	return a->stolen->start - b->stolen->start;
 }
@@ -221,7 +221,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		if (obj->stolen == NULL)
 			continue;
 
-		list_add(&obj->exec_list, &stolen);
+		list_add(&obj->obj_exec_link, &stolen);
 
 		total_obj_size += obj->base.size;
 		total_gtt_size += i915_gem_obj_ggtt_size(obj);
@@ -231,7 +231,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		if (obj->stolen == NULL)
 			continue;
 
-		list_add(&obj->exec_list, &stolen);
+		list_add(&obj->obj_exec_link, &stolen);
 
 		total_obj_size += obj->base.size;
 		count++;
@@ -239,11 +239,11 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	list_sort(NULL, &stolen, obj_rank_by_stolen);
 	seq_puts(m, "Stolen:\n");
 	while (!list_empty(&stolen)) {
-		obj = list_first_entry(&stolen, typeof(*obj), exec_list);
+		obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
 		seq_puts(m, "   ");
 		describe_obj(m, obj);
 		seq_putc(m, '\n');
-		list_del_init(&obj->exec_list);
+		list_del_init(&obj->obj_exec_link);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2e7d5f9524f7..6532d9713b72 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1312,6 +1312,8 @@ struct drm_i915_gem_object {
 	struct list_head global_list;
 
 	struct list_head ring_list;
+	/** Used in execbuf to temporarily hold a ref */
+	struct list_head obj_exec_link;
 	/** This object's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 41dc04293584..ce40e27f8b42 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3992,6 +3992,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
+	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
 	obj->ops = ops;

From 82a55ad1a0585e4e01a47f72fe81fb5a2d2c0fb1 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Wed, 14 Aug 2013 11:38:34 +0200
Subject: [PATCH 34/62] drm/i915: Switch eviction code to use vmas

The execbuf wants to do relocations usings vmas, so we need a
vma->exec_list. The eviction code also uses the old obj execbuf list
for it's own book-keeping, but would really prefer to deal in vmas
only. So switch it over to the new list.

Again this is just a prep patch for the big execbuf vma conversion.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Split out from Ben's big execbuf vma patch.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
 drivers/gpu/drm/i915/i915_gem.c       |  1 +
 drivers/gpu/drm/i915/i915_gem_evict.c | 31 ++++++++++++---------------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6532d9713b72..b2b9836c92e1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -563,6 +563,10 @@ struct i915_vma {
 	struct list_head mm_list;
 
 	struct list_head vma_link; /* Link in the object's VMA list */
+
+	/** This vma's place in the batchbuffer or on the eviction list */
+	struct list_head exec_list;
+
 };
 
 struct i915_ctx_hang_stats {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ce40e27f8b42..ca29055ae206 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4132,6 +4132,7 @@ struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
 
 	INIT_LIST_HEAD(&vma->vma_link);
 	INIT_LIST_HEAD(&vma->mm_list);
+	INIT_LIST_HEAD(&vma->exec_list);
 	vma->vm = vm;
 	vma->obj = obj;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 425939b7d343..87875884770c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -37,7 +37,7 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 	if (vma->obj->pin_count)
 		return false;
 
-	list_add(&vma->obj->exec_list, unwind);
+	list_add(&vma->exec_list, unwind);
 	return drm_mm_scan_add_block(&vma->node);
 }
 
@@ -49,7 +49,6 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct list_head eviction_list, unwind_list;
 	struct i915_vma *vma;
-	struct drm_i915_gem_object *obj;
 	int ret = 0;
 
 	trace_i915_gem_evict(dev, min_size, alignment, mappable);
@@ -104,14 +103,13 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 none:
 	/* Nothing found, clean up and bail out! */
 	while (!list_empty(&unwind_list)) {
-		obj = list_first_entry(&unwind_list,
-				       struct drm_i915_gem_object,
+		vma = list_first_entry(&unwind_list,
+				       struct i915_vma,
 				       exec_list);
-		vma = i915_gem_obj_to_vma(obj, vm);
 		ret = drm_mm_scan_remove_block(&vma->node);
 		BUG_ON(ret);
 
-		list_del_init(&obj->exec_list);
+		list_del_init(&vma->exec_list);
 	}
 
 	/* We expect the caller to unpin, evict all and try again, or give up.
@@ -125,28 +123,27 @@ found:
 	 * temporary list. */
 	INIT_LIST_HEAD(&eviction_list);
 	while (!list_empty(&unwind_list)) {
-		obj = list_first_entry(&unwind_list,
-				       struct drm_i915_gem_object,
+		vma = list_first_entry(&unwind_list,
+				       struct i915_vma,
 				       exec_list);
-		vma = i915_gem_obj_to_vma(obj, vm);
 		if (drm_mm_scan_remove_block(&vma->node)) {
-			list_move(&obj->exec_list, &eviction_list);
-			drm_gem_object_reference(&obj->base);
+			list_move(&vma->exec_list, &eviction_list);
+			drm_gem_object_reference(&vma->obj->base);
 			continue;
 		}
-		list_del_init(&obj->exec_list);
+		list_del_init(&vma->exec_list);
 	}
 
 	/* Unbinding will emit any required flushes */
 	while (!list_empty(&eviction_list)) {
-		obj = list_first_entry(&eviction_list,
-				       struct drm_i915_gem_object,
+		vma = list_first_entry(&eviction_list,
+				       struct i915_vma,
 				       exec_list);
 		if (ret == 0)
-			ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm));
+			ret = i915_vma_unbind(vma);
 
-		list_del_init(&obj->exec_list);
-		drm_gem_object_unreference(&obj->base);
+		list_del_init(&vma->exec_list);
+		drm_gem_object_unreference(&vma->obj->base);
 	}
 
 	return ret;

From accfef2e5a8f713bfa0c06696b5e10754686dc72 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Wed, 14 Aug 2013 11:38:35 +0200
Subject: [PATCH 35/62] drm/i915: prepare bind_to_vm for preallocated vma

In the new execbuf code we want to track buffers using the vmas even
before they're all properly mapped. Which means that bind_to_vm needs
to deal with buffers which have preallocated vmas which aren't yet
bound.

This patch implements this prep work and adjusts our WARN/BUG checks.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Split out from Ben's big execbuf patch. Also move one BUG
back to its original place to deflate the diff a notch.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++------
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b2b9836c92e1..4bd66e9a0450 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1914,6 +1914,9 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 				struct i915_address_space *vm);
 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 				     struct i915_address_space *vm);
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm);
 /* Some GGTT VM helpers */
 #define obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca29055ae206..449575b85b31 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3124,9 +3124,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	if (WARN_ON(!list_empty(&obj->vma_list)))
-		return -EBUSY;
-
 	fence_size = i915_gem_get_gtt_size(dev,
 					   obj->base.size,
 					   obj->tiling_mode);
@@ -3165,16 +3162,17 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	/* FIXME: For now we only ever use 1 VMA per object */
 	BUG_ON(!i915_is_ggtt(vm));
-	WARN_ON(!list_empty(&obj->vma_list));
 
-	vma = i915_gem_vma_create(obj, vm);
+	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unpin;
 	}
 
+	/* For now we only ever use 1 vma per object */
+	WARN_ON(!list_is_singular(&obj->vma_list));
+
 search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 						  size, alignment,
@@ -4882,3 +4880,16 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 
 	return NULL;
 }
+
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+
+	vma = i915_gem_obj_to_vma(obj, vm);
+	if (!vma)
+		vma = i915_gem_vma_create(obj, vm);
+
+	return vma;
+}

From 8637b407cf1740c52a01b9fc0cf506f31e225151 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Fri, 16 Aug 2013 13:29:33 -0700
Subject: [PATCH 36/62] drm/i915/vma: Correct use after free in eviction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vma will [possibly] be destroyed during unbind in eviction.
Immediately after this, we try to delete the list entry.

Chris and Ville did the debug on this before I woke up, I just get to
take credit for the fix :p

For future reference the Oops that Mika reported:

[  403.472448] BUG: unable to handle kernel paging request at 6b6b6b6b
[  403.472473] IP: [<c12c1500>] __list_del_entry+0x20/0xe0
[  403.472514] *pdpt = 000000002e89c001 *pde = 0000000000000000
[  403.472556] Oops: 0000 [#1] SMP
[  403.472582] Modules linked in: mxm_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi psmouse snd_seq_midi_event snd_seq serio_raw snd_timer snd_seq_device snd soundcore snd_page_alloc wmi bnep rfcomm bluetooth mac_hid parport_pc ppdev lp parport usbhid dm_crypt firewire_ohci firewire_core crc_itu_t i915 drm_kms_helper e1000e ptp drm i2c_algo_bit pps_core xhci_hcd video
[  403.472895] CPU: 2 PID: 1940 Comm: Xorg Not tainted 3.11.0-rc2+ #827
[  403.472938] Hardware name:                  /DZ77BH-55K, BIOS BHZ7710H.86A.0070.2012.0416.2117 04/16/2012
[  403.473002] task: ec866c00 ti: ee6a2000 task.ti: ee6a2000
[  403.473039] EIP: 0060:[<c12c1500>] EFLAGS: 00013202 CPU: 2
[  403.473078] EIP is at __list_del_entry+0x20/0xe0
[  403.473109] EAX: f016d9bc EBX: f016d9bc ECX: 6b6b6b6b EDX: 6b6b6b6b
[  403.473151] ESI: 00000000 EDI: ee6a3c90 EBP: ee6a3c60 ESP: ee6a3c48
[  403.473193]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[  403.473230] CR0: 80050033 CR2: 6b6b6b6b CR3: 2ec43000 CR4: 001407f0
[  403.473271] Stack:
[  403.473285]  f63b2ff0 f61f98c0 f61f8000 f016d9bc 00000000 f016d9bc ee6a3cac f8519a4a
[  403.473347]  00000000 00000000 10000000 f61f8000 0100a000 10000000 00000001 008ca000
[  403.473410]  f64ee840 f61f98c0 f016d9bc f016dcec ee6a3c98 ee6a3c98 f61f98c0 dcc58f00
[  403.473472] Call Trace:
[  403.473509]  [<f8519a4a>] i915_gem_evict_something+0x17a/0x2d0 [i915]
[  403.473567]  [<f8516ed1>] i915_gem_object_pin+0x271/0x660 [i915]
[  403.473622]  [<f851c740>] ? i915_ggtt_clear_range+0x20/0x20 [i915]
[  403.473676]  [<f8517afa>] i915_gem_object_pin_to_display_plane+0xda/0x190 [i915]
[  403.473742]  [<f852d9fa>] intel_pin_and_fence_fb_obj+0xba/0x140 [i915]
[  403.473800]  [<f852db40>] intel_gen7_queue_flip+0x30/0x1c0 [i915]
[  403.473856]  [<f85337b0>] intel_crtc_page_flip+0x1a0/0x320 [i915]
[  403.473911]  [<f847b549>] ? drm_framebuffer_reference+0x39/0x80 [drm]
[  403.473965]  [<f847f9fb>] drm_mode_page_flip_ioctl+0x28b/0x320 [drm]
[  403.474018]  [<f846fec8>] drm_ioctl+0x4b8/0x560 [drm]
[  403.474064]  [<f847f770>] ? drm_mode_gamma_get_ioctl+0xd0/0xd0 [drm]
[  403.474113]  [<c1140f8a>] ? do_sync_read+0x6a/0xa0
[  403.474154]  [<f846fa10>] ? drm_copy_field+0x80/0x80 [drm]
[  403.474193]  [<c115134c>] do_vfs_ioctl+0x7c/0x5b0
[  403.474228]  [<c1141d2f>] ? vfs_read+0xef/0x160
[  403.474263]  [<c108dcbb>] ? ktime_get_ts+0x4b/0x120
[  403.474298]  [<c1151917>] SyS_ioctl+0x97/0xa0
[  403.474330]  [<c1590bc1>] sysenter_do_call+0x12/0x22
[  403.474364] Code: 55 f4 8b 45 f8 e9 75 ff ff ff 90 55 89 e5 53 83 ec 14 8b 08 8b 50 04 81 f9 00 01 10 00 74 24 81 fa 00 02 20 00 0f 84 8e 00 00 00 <8b> 1a 39 d8 75 62 8b 59 04 39 d8 75 35 89 51 04 89 0a 83 c4 14
[  403.474566] EIP: [<c12c1500>] __list_del_entry+0x20/0xe0 SS:ESP 0068:ee6a3c48
[  403.476513] CR2: 000000006b6b6b6b

v2: Missed the drm_object_unreference use after free (Ville)
Daniel Vetter <daniel@ffwll.ch> writes:

Reported-by: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
[danvet: Add the Oops from Mika to the commit message.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 87875884770c..91b700155850 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -136,14 +136,17 @@ found:
 
 	/* Unbinding will emit any required flushes */
 	while (!list_empty(&eviction_list)) {
+		struct drm_gem_object *obj;
 		vma = list_first_entry(&eviction_list,
 				       struct i915_vma,
 				       exec_list);
+
+		obj =  &vma->obj->base;
+		list_del_init(&vma->exec_list);
 		if (ret == 0)
 			ret = i915_vma_unbind(vma);
 
-		list_del_init(&vma->exec_list);
-		drm_gem_object_unreference(&vma->obj->base);
+		drm_gem_object_unreference(obj);
 	}
 
 	return ret;

From 139ccd3fb12b3d17a773d2d61140f955a47fa470 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 19 Aug 2013 11:04:55 -0700
Subject: [PATCH 37/62] drm/i915: make IVB FDI training match spec v3

The existing code was trying different vswing and preemphasis settings
in the wrong place, and wasn't trying them enough.  So add a loop to
walk through them, properly disabling FDI TX and RX in between if a
failure is detected.

v2: remove unneeded reg writes, add delays around bit lock checks (Jesse)
v3: fix TX and RX disable per spec (Paulo)
    fix delays per spec (Paulo)
    make RX symbol lock check match TX bit lock check (Paulo)

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=51983
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 164 ++++++++++++++-------------
 1 file changed, 83 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e600e1cfb6ea..7a40427823c7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2597,7 +2597,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i;
+	u32 reg, temp, i, j;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -2613,97 +2613,99 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n",
 		      I915_READ(FDI_RX_IIR(pipe)));
 
-	/* enable CPU FDI TX and PCH FDI RX */
-	reg = FDI_TX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_DP_PORT_WIDTH_MASK;
-	temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
-	temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
-	temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
-	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
-	temp |= FDI_COMPOSITE_SYNC;
-	I915_WRITE(reg, temp | FDI_TX_ENABLE);
-
-	I915_WRITE(FDI_RX_MISC(pipe),
-		   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
-
-	reg = FDI_RX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_AUTO;
-	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
-	temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
-	temp |= FDI_COMPOSITE_SYNC;
-	I915_WRITE(reg, temp | FDI_RX_ENABLE);
-
-	POSTING_READ(reg);
-	udelay(150);
-
-	for (i = 0; i < 4; i++) {
+	/* Try each vswing and preemphasis setting twice before moving on */
+	for (j = 0; j < ARRAY_SIZE(snb_b_fdi_train_param) * 2; j++) {
+		/* disable first in case we need to retry */
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
+		temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
+		temp &= ~FDI_TX_ENABLE;
+		I915_WRITE(reg, temp);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_AUTO;
+		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+		temp &= ~FDI_RX_ENABLE;
+		I915_WRITE(reg, temp);
+
+		/* enable CPU FDI TX and PCH FDI RX */
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_DP_PORT_WIDTH_MASK;
+		temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
+		temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-		temp |= snb_b_fdi_train_param[i];
+		temp |= snb_b_fdi_train_param[j/2];
+		temp |= FDI_COMPOSITE_SYNC;
+		I915_WRITE(reg, temp | FDI_TX_ENABLE);
+
+		I915_WRITE(FDI_RX_MISC(pipe),
+			   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
+		temp |= FDI_COMPOSITE_SYNC;
+		I915_WRITE(reg, temp | FDI_RX_ENABLE);
+
+		POSTING_READ(reg);
+		udelay(1); /* should be 0.5us */
+
+		for (i = 0; i < 4; i++) {
+			reg = FDI_RX_IIR(pipe);
+			temp = I915_READ(reg);
+			DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+
+			if (temp & FDI_RX_BIT_LOCK ||
+			    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
+				I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
+				DRM_DEBUG_KMS("FDI train 1 done, level %i.\n",
+					      i);
+				break;
+			}
+			udelay(1); /* should be 0.5us */
+		}
+		if (i == 4) {
+			DRM_DEBUG_KMS("FDI train 1 fail on vswing %d\n", j / 2);
+			continue;
+		}
+
+		/* Train 2 */
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_NONE_IVB;
+		temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
+		I915_WRITE(reg, temp);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+		temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
 		I915_WRITE(reg, temp);
 
 		POSTING_READ(reg);
-		udelay(500);
+		udelay(2); /* should be 1.5us */
 
-		reg = FDI_RX_IIR(pipe);
-		temp = I915_READ(reg);
-		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+		for (i = 0; i < 4; i++) {
+			reg = FDI_RX_IIR(pipe);
+			temp = I915_READ(reg);
+			DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
 
-		if (temp & FDI_RX_BIT_LOCK ||
-		    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
-			I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
-			DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i);
-			break;
+			if (temp & FDI_RX_SYMBOL_LOCK ||
+			    (I915_READ(reg) & FDI_RX_SYMBOL_LOCK)) {
+				I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
+				DRM_DEBUG_KMS("FDI train 2 done, level %i.\n",
+					      i);
+				goto train_done;
+			}
+			udelay(2); /* should be 1.5us */
 		}
+		if (i == 4)
+			DRM_DEBUG_KMS("FDI train 2 fail on vswing %d\n", j / 2);
 	}
-	if (i == 4)
-		DRM_ERROR("FDI train 1 fail!\n");
-
-	/* Train 2 */
-	reg = FDI_TX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_NONE_IVB;
-	temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
-	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
-	I915_WRITE(reg, temp);
-
-	reg = FDI_RX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
-	temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
-	I915_WRITE(reg, temp);
-
-	POSTING_READ(reg);
-	udelay(150);
-
-	for (i = 0; i < 4; i++) {
-		reg = FDI_TX_CTL(pipe);
-		temp = I915_READ(reg);
-		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-		temp |= snb_b_fdi_train_param[i];
-		I915_WRITE(reg, temp);
-
-		POSTING_READ(reg);
-		udelay(500);
-
-		reg = FDI_RX_IIR(pipe);
-		temp = I915_READ(reg);
-		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
-
-		if (temp & FDI_RX_SYMBOL_LOCK) {
-			I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
-			DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i);
-			break;
-		}
-	}
-	if (i == 4)
-		DRM_ERROR("FDI train 2 fail!\n");
 
+train_done:
 	DRM_DEBUG_KMS("FDI train done.\n");
 }
 

From 8254860096df085d633207d4d68550bb2ca29f17 Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 19 Aug 2013 19:32:00 +0100
Subject: [PATCH 38/62] drm/i915: Remove DSPARB_HWCONTROL()

This define hasn't been used since:

  commit 652c393a3368af84359da37c45afc35a91144960
  Author: Jesse Barnes <jbarnes@virtuousgeek.org>
  Date:   Mon Aug 17 13:31:43 2009 -0700

      drm/i915: add dynamic clock frequency control

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4bd66e9a0450..15aede2eb458 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1562,8 +1562,6 @@ struct drm_i915_file_private {
 #define SUPPORTS_EDP(dev)		(IS_IRONLAKE_M(dev))
 #define SUPPORTS_TV(dev)		(INTEL_INFO(dev)->supports_tv)
 #define I915_HAS_HOTPLUG(dev)		 (INTEL_INFO(dev)->has_hotplug)
-/* dsparb controlled by hw only */
-#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IRONLAKE(dev))
 
 #define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2)
 #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)

From fdaa930bee14abe5ed1d1aead5bc6a9a5660ccbf Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 19 Aug 2013 19:32:01 +0100
Subject: [PATCH 39/62] drm/i915: Remove HAS_PIPE_CONTROL()

The code using this was removed in:

  commit 88f23b8fa3e6357c423af24ec31c661fc12f884b
  Author: Chris Wilson <chris@chris-wilson.co.uk>
  Date:   Sun Dec 5 15:08:31 2010 +0000

      drm/i915: Avoid using PIPE_CONTROL on Ironlake

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 15aede2eb458..29cccf3f0918 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1569,8 +1569,6 @@ struct drm_i915_file_private {
 
 #define HAS_IPS(dev)		(IS_ULT(dev))
 
-#define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5)
-
 #define HAS_DDI(dev)		(INTEL_INFO(dev)->has_ddi)
 #define HAS_POWER_WELL(dev)	(IS_HASWELL(dev))
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)

From 3abdb33410d8b130437613a2fe3d5bf667ca34da Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 19 Aug 2013 19:32:02 +0100
Subject: [PATCH 40/62] drm: Remove IS_IRONLAKE_D()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This define hasn't been used since:

  commit cfdf1fa23f4074c9f8766dc67a928bbf680b1ac9
  Author: Kristian Høgsberg <krh@bitplanet.net>
  Date:   Wed Dec 16 15:16:16 2009 -0500

      drm/i915: Implement IS_* macros using static tables

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 29cccf3f0918..9bf28010c231 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1505,7 +1505,6 @@ struct drm_i915_file_private {
 #define IS_PINEVIEW_M(dev)	((dev)->pci_device == 0xa011)
 #define IS_PINEVIEW(dev)	(INTEL_INFO(dev)->is_pineview)
 #define IS_G33(dev)		(INTEL_INFO(dev)->is_g33)
-#define IS_IRONLAKE_D(dev)	((dev)->pci_device == 0x0042)
 #define IS_IRONLAKE_M(dev)	((dev)->pci_device == 0x0046)
 #define IS_IVYBRIDGE(dev)	(INTEL_INFO(dev)->is_ivybridge)
 #define IS_IVB_GT1(dev)		((dev)->pci_device == 0x0156 || \

From e3ce7633ba38a97c2203ab60f381ce1642940328 Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 19 Aug 2013 19:32:03 +0100
Subject: [PATCH 41/62] drm/i915: Remove I915_READ_{NOPID, SYNC_0, SYNC_1})()

The code directly uses the registers and ring->mmio_base.

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5e6be842d225..432ad5311ba6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -33,10 +33,6 @@ struct  intel_hw_status_page {
 #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
-#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)->mmio_base))
-#define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
-#define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
-
 enum intel_ring_hangcheck_action {
 	HANGCHECK_WAIT,
 	HANGCHECK_ACTIVE,

From ec013e7f491cceef0e87190a3c6b132ce49f7ce4 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 20 Aug 2013 10:29:23 +0100
Subject: [PATCH 42/62] drm/i915: Expose energy counter on SNB+ through debugfs

On SNB and IVB, there's an MSR (also exposed through MCHBAR) we can use
to read out the amount of energy used over time.  Expose this in sysfs
to make it easy to do power comparisons with different configurations.

If the platform supports it, the file will show up under the
drm/card0/power subdirectory of the PCI device in sysfs as gt_energy_uJ.
The value in the file is a running total of energy (in microjoules)
consumed by the graphics device.

v2: move to sysfs (Ben, Daniel)
    expose a simple value (Chris)
    drop unrelated hunk (Ben)

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

v3: by Ben
Tied it into existing rc6  sysfs entries and named that a more generic
"power attrs." Fixed rebase conflicts.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

v4: Since RAPL is a real driver that already exists to serve power
monitoring, place our entry in debugfs. This gives me a fallback
location for systems that do not expose it otherwise.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h     |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4785d8c14654..236d97e51c3a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/list_sort.h>
+#include <asm/msr-index.h>
 #include <drm/drmP.h>
 #include "intel_drv.h"
 #include "intel_ringbuffer.h"
@@ -1769,6 +1770,27 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_energy_uJ(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u64 power;
+	u32 units;
+
+	if (INTEL_INFO(dev)->gen < 6)
+		return -ENODEV;
+
+	rdmsrl(MSR_RAPL_POWER_UNIT, power);
+	power = (power & 0x1f00) >> 8;
+	units = 1000000 / (1 << power); /* convert to uJ */
+	power = I915_READ(MCH_SECP_NRG_STTS);
+	power *= units;
+
+	seq_printf(m, "%llu", (long long unsigned)power);
+	return 0;
+}
+
 static int
 i915_wedged_get(void *data, u64 *val)
 {
@@ -2208,6 +2230,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_dpio", i915_dpio_info, 0},
 	{"i915_llc", i915_llc, 0},
 	{"i915_edp_psr_status", i915_edp_psr_status, 0},
+	{"i915_energy_uJ", i915_energy_uJ, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3c652eb7ee6c..c4c509895826 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1447,6 +1447,8 @@
 #define   MCH_SSKPD_WM0_MASK		0x3f
 #define   MCH_SSKPD_WM0_VAL		0xc
 
+#define MCH_SECP_NRG_STTS		(MCHBAR_MIRROR_BASE_SNB + 0x592c)
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */

From a40066412cc2ace1c1299e7a4d7a81dc33395b6f Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:11 -0300
Subject: [PATCH 43/62] drm/i915: add the FCLK case to intel_ddi_get_cdclk_freq

We already have code to disable LCPLL and switch to FCLK, so we need this too.
We still don't call the code to disable LCPLL, but we'll call it when we add
support for Package C8+.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ddi.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index b8c096b4a1de..63aca49d11a8 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1139,10 +1139,13 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder)
 
 int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
 {
-	if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT)
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT)
 		return 450000;
-	else if ((I915_READ(LCPLL_CTL) & LCPLL_CLK_FREQ_MASK) ==
-		 LCPLL_CLK_FREQ_450)
+	else if ((lcpll & LCPLL_CLK_FREQ_MASK) == LCPLL_CLK_FREQ_450)
 		return 450000;
 	else if (IS_ULT(dev_priv->dev))
 		return 337500;

From 43eaea131823c5ca13d03364e61bd15f0b22a0f7 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:12 -0300
Subject: [PATCH 44/62] drm/i915: wrap GTIMR changes

Just like the functions that touch DEIMR and SDEIMR, but for GTIMR.
The new functions contain a POSTING_READ(GTIMR) which was not present
at the 2 callers inside i915_irq.c.

The implementation is based on ibx_display_interrupt_update.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c         | 34 ++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_drv.h        |  3 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 22 +++++-----------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 28d57477aa42..6bd4508666d2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -104,6 +104,34 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
 	}
 }
 
+/**
+ * ilk_update_gt_irq - update GTIMR
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
+static void ilk_update_gt_irq(struct drm_i915_private *dev_priv,
+			      uint32_t interrupt_mask,
+			      uint32_t enabled_irq_mask)
+{
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	dev_priv->gt_irq_mask &= ~interrupt_mask;
+	dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask);
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	POSTING_READ(GTIMR);
+}
+
+void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	ilk_update_gt_irq(dev_priv, mask, mask);
+}
+
+void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	ilk_update_gt_irq(dev_priv, mask, 0);
+}
+
 static bool ivb_can_enable_err_int(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -806,8 +834,7 @@ static void ivybridge_parity_work(struct work_struct *work)
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	dev_priv->gt_irq_mask &= ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
 	mutex_unlock(&dev_priv->dev->struct_mutex);
@@ -837,8 +864,7 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
 		return;
 
 	spin_lock(&dev_priv->irq_lock);
-	dev_priv->gt_irq_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
 	spin_unlock(&dev_priv->irq_lock);
 
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 01455aa8b8bb..a8462064714c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -778,5 +778,8 @@ extern void intel_edp_psr_update(struct drm_device *dev);
 extern void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
 			      bool switch_to_fclk, bool allow_power_down);
 extern void hsw_restore_lcpll(struct drm_i915_private *dev_priv);
+extern void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+extern void ilk_disable_gt_irq(struct drm_i915_private *dev_priv,
+			       uint32_t mask);
 
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 34777168f700..2e370804248f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -836,11 +836,8 @@ gen5_ring_get_irq(struct intel_ring_buffer *ring)
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
-	}
+	if (ring->irq_refcount++ == 0)
+		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
 	return true;
@@ -854,11 +851,8 @@ gen5_ring_put_irq(struct intel_ring_buffer *ring)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
-	}
+	if (--ring->irq_refcount == 0)
+		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
@@ -1028,9 +1022,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
 					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
 		else
 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
+		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1051,9 +1043,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
 				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
 		else
 			I915_WRITE_IMR(ring, ~0);
-		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
+		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 

From edbfdb456053d0738e6b06a3827ead4158bfc918 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:13 -0300
Subject: [PATCH 45/62] drm/i915: wrap GEN6_PMIMR changes

Just like we're doing with the other IMR changes.

One of the functional changes is that not every caller was doing the
POSTING_READ.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c         | 47 +++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_drv.h        |  3 ++
 drivers/gpu/drm/i915/intel_pm.c         |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  8 ++---
 4 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6bd4508666d2..af5c335a69db 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -132,6 +132,41 @@ void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
 	ilk_update_gt_irq(dev_priv, mask, 0);
 }
 
+/**
+  * snb_update_pm_irq - update GEN6_PMIMR
+  * @dev_priv: driver private
+  * @interrupt_mask: mask of interrupt bits to update
+  * @enabled_irq_mask: mask of interrupt bits to enable
+  */
+static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
+			      uint32_t interrupt_mask,
+			      uint32_t enabled_irq_mask)
+{
+	uint32_t pmimr = I915_READ(GEN6_PMIMR);
+	pmimr &= ~interrupt_mask;
+	pmimr |= (~enabled_irq_mask & interrupt_mask);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	I915_WRITE(GEN6_PMIMR, pmimr);
+	POSTING_READ(GEN6_PMIMR);
+}
+
+void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	snb_update_pm_irq(dev_priv, mask, mask);
+}
+
+void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	snb_update_pm_irq(dev_priv, mask, 0);
+}
+
+static void snb_set_pm_irq(struct drm_i915_private *dev_priv, uint32_t val)
+{
+	snb_update_pm_irq(dev_priv, 0xffffffff, ~val);
+}
+
 static bool ivb_can_enable_err_int(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -739,15 +774,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    rps.work);
-	u32 pm_iir, pm_imr;
+	u32 pm_iir;
 	u8 new_delay;
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
-	pm_imr = I915_READ(GEN6_PMIMR);
 	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	I915_WRITE(GEN6_PMIMR, pm_imr & ~GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
@@ -921,8 +955,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv,
 
 	spin_lock(&dev_priv->irq_lock);
 	dev_priv->rps.pm_iir |= pm_iir;
-	I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir);
-	POSTING_READ(GEN6_PMIMR);
+	snb_set_pm_irq(dev_priv, dev_priv->rps.pm_iir);
 	spin_unlock(&dev_priv->irq_lock);
 
 	queue_work(dev_priv->wq, &dev_priv->rps.work);
@@ -1005,8 +1038,8 @@ static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv,
 	if (pm_iir & GEN6_PM_RPS_EVENTS) {
 		spin_lock(&dev_priv->irq_lock);
 		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-		I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir);
-		/* never want to mask useful interrupts. (also posting read) */
+		snb_set_pm_irq(dev_priv, dev_priv->rps.pm_iir);
+		/* never want to mask useful interrupts. */
 		WARN_ON(I915_READ_NOTRACE(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
 		spin_unlock(&dev_priv->irq_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a8462064714c..8222f2426b47 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -781,5 +781,8 @@ extern void hsw_restore_lcpll(struct drm_i915_private *dev_priv);
 extern void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 extern void ilk_disable_gt_irq(struct drm_i915_private *dev_priv,
 			       uint32_t mask);
+extern void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+extern void snb_disable_pm_irq(struct drm_i915_private *dev_priv,
+			       uint32_t mask);
 
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7bc3f1783174..4f0857346bfd 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3450,7 +3450,7 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON(dev_priv->rps.pm_iir);
-	I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 	spin_unlock_irq(&dev_priv->irq_lock);
 	/* only unmask PM interrupts we need. Mask all others. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2e370804248f..7de29d40d1ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1062,10 +1062,8 @@ hsw_vebox_get_irq(struct intel_ring_buffer *ring)
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 	if (ring->irq_refcount++ == 0) {
-		u32 pm_imr = I915_READ(GEN6_PMIMR);
 		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		I915_WRITE(GEN6_PMIMR, pm_imr & ~ring->irq_enable_mask);
-		POSTING_READ(GEN6_PMIMR);
+		snb_enable_pm_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1084,10 +1082,8 @@ hsw_vebox_put_irq(struct intel_ring_buffer *ring)
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 	if (--ring->irq_refcount == 0) {
-		u32 pm_imr = I915_READ(GEN6_PMIMR);
 		I915_WRITE_IMR(ring, ~0);
-		I915_WRITE(GEN6_PMIMR, pm_imr | ring->irq_enable_mask);
-		POSTING_READ(GEN6_PMIMR);
+		snb_disable_pm_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }

From f52ecbcf8009ef18cda86b30efd837338cd25392 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:14 -0300
Subject: [PATCH 46/62] drm/i915: don't update GEN6_PMIMR when it's not needed

I did some brief tests and the "new_val = pmimr" condition usually
happens a few times after exiting games.

Note: This is also prep work to track the GEN6_PMIMR register state in
dev_priv->pm_imr. This happens in the next patch.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
[danvet: Add note to explain why we want this, as per the discussion
between Chris and Paulo.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index af5c335a69db..efe3fc671e1e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -142,14 +142,18 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
 			      uint32_t interrupt_mask,
 			      uint32_t enabled_irq_mask)
 {
-	uint32_t pmimr = I915_READ(GEN6_PMIMR);
-	pmimr &= ~interrupt_mask;
-	pmimr |= (~enabled_irq_mask & interrupt_mask);
+	uint32_t pmimr, new_val;
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMIMR, pmimr);
-	POSTING_READ(GEN6_PMIMR);
+	pmimr = new_val = I915_READ(GEN6_PMIMR);
+	new_val &= ~interrupt_mask;
+	new_val |= (~enabled_irq_mask & interrupt_mask);
+
+	if (new_val != pmimr) {
+		I915_WRITE(GEN6_PMIMR, new_val);
+		POSTING_READ(GEN6_PMIMR);
+	}
 }
 
 void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)

From 605cd25b1ffa09a2f86b5c4bd120086dd5ea10a7 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:15 -0300
Subject: [PATCH 47/62] drm/i915: add dev_priv->pm_irq_mask

Just like irq_mask and gt_irq_mask, use it to track the status of
GEN6_PMIMR so we don't need to read it again every time we call
snb_update_pm_irq.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_irq.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9bf28010c231..f423a94c1b25 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1123,6 +1123,7 @@ typedef struct drm_i915_private {
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	u32 irq_mask;
 	u32 gt_irq_mask;
+	u32 pm_irq_mask;
 
 	struct work_struct hotplug_work;
 	bool enable_hotplug_processing;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index efe3fc671e1e..8872e1955c45 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -142,16 +142,17 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
 			      uint32_t interrupt_mask,
 			      uint32_t enabled_irq_mask)
 {
-	uint32_t pmimr, new_val;
+	uint32_t new_val;
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	pmimr = new_val = I915_READ(GEN6_PMIMR);
+	new_val = dev_priv->pm_irq_mask;
 	new_val &= ~interrupt_mask;
 	new_val |= (~enabled_irq_mask & interrupt_mask);
 
-	if (new_val != pmimr) {
-		I915_WRITE(GEN6_PMIMR, new_val);
+	if (new_val != dev_priv->pm_irq_mask) {
+		dev_priv->pm_irq_mask = new_val;
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		POSTING_READ(GEN6_PMIMR);
 	}
 }
@@ -2217,8 +2218,9 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 		if (HAS_VEBOX(dev))
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
+		dev_priv->pm_irq_mask = 0xffffffff;
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
-		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
 		POSTING_READ(GEN6_PMIER);
 	}

From 333a820416ccb0e24974b6ebe7d447c0c28c7b76 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Tue, 6 Aug 2013 18:57:16 -0300
Subject: [PATCH 48/62] drm/i915: don't disable/reenable IVB error interrupts
 when not needed

If the error interrupts are already disabled, don't disable and
reenable them. This is going to be needed when we're in PC8+, where
all the interrupts are disabled so we won't risk re-enabling
DE_ERR_INT_IVB.

v2: Use dev_priv->irq_mask (Chris)

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8872e1955c45..976113af1859 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1373,6 +1373,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
 	irqreturn_t ret = IRQ_NONE;
+	bool err_int_reenable = false;
 
 	atomic_inc(&dev_priv->irq_received);
 
@@ -1401,7 +1402,9 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	 * handler. */
 	if (IS_HASWELL(dev)) {
 		spin_lock(&dev_priv->irq_lock);
-		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		err_int_reenable = ~dev_priv->irq_mask & DE_ERR_INT_IVB;
+		if (err_int_reenable)
+			ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
 		spin_unlock(&dev_priv->irq_lock);
 	}
 
@@ -1437,7 +1440,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		}
 	}
 
-	if (IS_HASWELL(dev)) {
+	if (err_int_reenable) {
 		spin_lock(&dev_priv->irq_lock);
 		if (ivb_can_enable_err_int(dev))
 			ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);

From 60611c137641af41895828cfc74f5be64ed69b49 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Thu, 15 Aug 2013 11:50:01 -0300
Subject: [PATCH 49/62] drm/i915: don't queue PM events we won't process

On SNB/IVB/VLV we only call gen6_rps_irq_handler if one of the IIR
bits set is part of GEN6_PM_RPS_EVENTS, but at gen6_rps_irq_handler we
add all the enabled IIR bits to the work queue, not only the ones that
are part of GEN6_PM_RPS_EVENTS. But then gen6_pm_rps_work only
processes GEN6_PM_RPS_EVENTS, so it's useless to add anything that's
not GEN6_PM_RPS_EVENTS to the work queue.

As a bonus, gen6_rps_irq_handler looks more similar to
hsw_pm_irq_handler, so we may be able to merge them in the future.

v2: - Add a WARN in case we queued something we're not going to
      process.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 976113af1859..c10d2f1af0be 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -789,6 +789,9 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
+	/* Make sure we didn't queue anything we're not going to process. */
+	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+
 	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
 		return;
 
@@ -959,7 +962,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv,
 	 */
 
 	spin_lock(&dev_priv->irq_lock);
-	dev_priv->rps.pm_iir |= pm_iir;
+	dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
 	snb_set_pm_irq(dev_priv, dev_priv->rps.pm_iir);
 	spin_unlock(&dev_priv->irq_lock);
 
@@ -1128,7 +1131,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
 			gmbus_irq_handler(dev);
 
-		if (pm_iir & GEN6_PM_RPS_EVENTS)
+		if (pm_iir)
 			gen6_rps_irq_handler(dev_priv, pm_iir);
 
 		I915_WRITE(GTIIR, gt_iir);
@@ -1433,7 +1436,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		if (pm_iir) {
 			if (IS_HASWELL(dev))
 				hsw_pm_irq_handler(dev_priv, pm_iir);
-			else if (pm_iir & GEN6_PM_RPS_EVENTS)
+			else
 				gen6_rps_irq_handler(dev_priv, pm_iir);
 			I915_WRITE(GEN6_PMIIR, pm_iir);
 			ret = IRQ_HANDLED;

From 4d3b3d5fd7d42a522a6c444388826bb23264db9f Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 9 Aug 2013 17:04:36 -0300
Subject: [PATCH 50/62] drm/i915: fix how we mask PMIMR when adding work to the
 queue

It seems we've been doing this ever since we started processing the
RPS events on a work queue, on commit "drm/i915: move gen6 rps
handling to workqueue", 4912d04193733a825216b926ffd290fada88ab07.

The problem is: when we add work to the queue, instead of just masking
the bits we queued and leaving all the others on their current state,
we mask the bits we queued and unmask all the others. This basically
means we'll be unmasking a bunch of interrupts we're not going to
process. And if you look at gen6_pm_rps_work, we unmask back only
GEN6_PM_RPS_EVENTS, which means the bits we unmasked when adding work
to the queue will remain unmasked after we process the queue.

Notice that even though we unmask those unrelated interrupts, we never
enable them on IER, so they don't fire our interrupt handler, they
just stay there on IIR waiting to be cleared when something else
triggers the interrupt handler.

So this patch does what seems to make more sense: mask only the bits
we add to the queue, without unmasking anything else, and so we'll
unmask them after we process the queue.

As a side effect we also have to remove that WARN, because it is not
only making sure we don't mask useful interrupts, it is also making
sure we do unmask useless interrupts! That piece of code should not be
responsible for knowing which bits should be unmasked, so just don't
assert anything, and trust that snb_disable_pm_irq should be doing the
right thing.

With i915.enable_pc8=1 I was getting ocasional "GEN6_PMIIR is not 0"
error messages due to the fact that we unmask those unrelated
interrupts but don't enable them.

Note: if bugs start bisecting to this patch, then it probably means
someone was relying on the fact that we unmask everything by accident,
then we should fix gen5_gt_irq_postinstall or whoever needs the
accidentally unmasked interrupts. Or maybe I was just wrong and we
need to revert this patch :)

Note: This started to be a more real issue with the addition of the
VEBOX support since now we do enable more than just the minimal set of
RPS interrupts in the IER register. Which means after the first rps
interrupt has happened we will never mask the VEBOX user interrupts
again and so will blow through cpu time needlessly when running video
workloads.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Add note that this started to matter with VEBOX much more.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c10d2f1af0be..e0c6f7d6189d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -167,11 +167,6 @@ void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
 	snb_update_pm_irq(dev_priv, mask, 0);
 }
 
-static void snb_set_pm_irq(struct drm_i915_private *dev_priv, uint32_t val)
-{
-	snb_update_pm_irq(dev_priv, 0xffffffff, ~val);
-}
-
 static bool ivb_can_enable_err_int(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -963,7 +958,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv,
 
 	spin_lock(&dev_priv->irq_lock);
 	dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-	snb_set_pm_irq(dev_priv, dev_priv->rps.pm_iir);
+	snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
 	spin_unlock(&dev_priv->irq_lock);
 
 	queue_work(dev_priv->wq, &dev_priv->rps.work);
@@ -1046,9 +1041,7 @@ static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv,
 	if (pm_iir & GEN6_PM_RPS_EVENTS) {
 		spin_lock(&dev_priv->irq_lock);
 		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-		snb_set_pm_irq(dev_priv, dev_priv->rps.pm_iir);
-		/* never want to mask useful interrupts. */
-		WARN_ON(I915_READ_NOTRACE(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
+		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
 		spin_unlock(&dev_priv->irq_lock);
 
 		queue_work(dev_priv->wq, &dev_priv->rps.work);

From 1403c0d4d46f2eed2ab13b89561c853988ad7513 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Thu, 15 Aug 2013 11:51:32 -0300
Subject: [PATCH 51/62] drm/i915: merge HSW and SNB PM irq handlers

Because hsw_pm_irq_handler does exactly what gen6_rps_irq_handler does
and also processes the 2 additional VEBOX bits. So merge those
functions and wrap the VEBOX bits on a HAS_VEBOX check. This
check isn't really necessary since the bits are reserved on
SNB/IVB/VLV, but it's a good documentation on who uses them.

v2: - Change IS_HASWELL check to HAS_VEBOX

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 50 ++++++++-------------------------
 1 file changed, 12 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e0c6f7d6189d..caf83da17bb0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -942,28 +942,6 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 		ivybridge_parity_error_irq_handler(dev);
 }
 
-/* Legacy way of handling PM interrupts */
-static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv,
-				 u32 pm_iir)
-{
-	/*
-	 * IIR bits should never already be set because IMR should
-	 * prevent an interrupt from being shown in IIR. The warning
-	 * displays a case where we've unsafely cleared
-	 * dev_priv->rps.pm_iir. Although missing an interrupt of the same
-	 * type is not a problem, it displays a problem in the logic.
-	 *
-	 * The mask bit in IMR is cleared by dev_priv->rps.work.
-	 */
-
-	spin_lock(&dev_priv->irq_lock);
-	dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-	snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
-	spin_unlock(&dev_priv->irq_lock);
-
-	queue_work(dev_priv->wq, &dev_priv->rps.work);
-}
-
 #define HPD_STORM_DETECT_PERIOD 1000
 #define HPD_STORM_THRESHOLD 5
 
@@ -1030,13 +1008,10 @@ static void dp_aux_irq_handler(struct drm_device *dev)
 	wake_up_all(&dev_priv->gmbus_wait_queue);
 }
 
-/* Unlike gen6_rps_irq_handler() from which this function is originally derived,
- * we must be able to deal with other PM interrupts. This is complicated because
- * of the way in which we use the masks to defer the RPS work (which for
- * posterity is necessary because of forcewake).
- */
-static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv,
-			       u32 pm_iir)
+/* The RPS events need forcewake, so we add them to a work queue and mask their
+ * IMR bits until the work is done. Other interrupts can be processed without
+ * the work queue. */
+static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
 	if (pm_iir & GEN6_PM_RPS_EVENTS) {
 		spin_lock(&dev_priv->irq_lock);
@@ -1047,12 +1022,14 @@ static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv,
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
 	}
 
-	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-		notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
+	if (HAS_VEBOX(dev_priv->dev)) {
+		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
 
-	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
-		DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir);
-		i915_handle_error(dev_priv->dev, false);
+		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
+			DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir);
+			i915_handle_error(dev_priv->dev, false);
+		}
 	}
 }
 
@@ -1427,10 +1404,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	if (INTEL_INFO(dev)->gen >= 6) {
 		u32 pm_iir = I915_READ(GEN6_PMIIR);
 		if (pm_iir) {
-			if (IS_HASWELL(dev))
-				hsw_pm_irq_handler(dev_priv, pm_iir);
-			else
-				gen6_rps_irq_handler(dev_priv, pm_iir);
+			gen6_rps_irq_handler(dev_priv, pm_iir);
 			I915_WRITE(GEN6_PMIIR, pm_iir);
 			ret = IRQ_HANDLED;
 		}

From 5032d871f7d300aee10c309ea004eb4f851553fe Mon Sep 17 00:00:00 2001
From: Rafael Barbalho <rafael.barbalho@intel.com>
Date: Wed, 21 Aug 2013 17:10:51 +0100
Subject: [PATCH 52/62] drm/i915: Cleaning up the relocate entry function

As the relocate entry function was getting a bit too big I've moved
the code that used to use either the cpu or the gtt to for the
relocation into two separate functions.

Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 88 +++++++++++++---------
 1 file changed, 54 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7dcf78cf6781..792c52a235ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -171,6 +171,56 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 		obj->cache_level != I915_CACHE_NONE);
 }
 
+static int
+relocate_entry_cpu(struct drm_i915_gem_object *obj,
+		   struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t page_offset = offset_in_page(reloc->offset);
+	char *vaddr;
+	int ret = -EINVAL;
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+	if (ret)
+		return ret;
+
+	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+				reloc->offset >> PAGE_SHIFT));
+	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+	kunmap_atomic(vaddr);
+
+	return 0;
+}
+
+static int
+relocate_entry_gtt(struct drm_i915_gem_object *obj,
+		   struct drm_i915_gem_relocation_entry *reloc)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t __iomem *reloc_entry;
+	void __iomem *reloc_page;
+	int ret = -EINVAL;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_put_fence(obj);
+	if (ret)
+		return ret;
+
+	/* Map the page containing the relocation we're going to perform.  */
+	reloc->offset += i915_gem_obj_ggtt_offset(obj);
+	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+			reloc->offset & PAGE_MASK);
+	reloc_entry = (uint32_t __iomem *)
+		(reloc_page + offset_in_page(reloc->offset));
+	iowrite32(reloc->delta, reloc_entry);
+	io_mapping_unmap_atomic(reloc_page);
+
+	return 0;
+}
+
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_objects *eb,
@@ -255,40 +305,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	reloc->delta += target_offset;
-	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = offset_in_page(reloc->offset);
-		char *vaddr;
-
-		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-		if (ret)
-			return ret;
-
-		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-							     reloc->offset >> PAGE_SHIFT));
-		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
-		kunmap_atomic(vaddr);
-	} else {
-		struct drm_i915_private *dev_priv = dev->dev_private;
-		uint32_t __iomem *reloc_entry;
-		void __iomem *reloc_page;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (ret)
-			return ret;
-
-		ret = i915_gem_object_put_fence(obj);
-		if (ret)
-			return ret;
-
-		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += i915_gem_obj_ggtt_offset(obj);
-		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-						      reloc->offset & PAGE_MASK);
-		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + offset_in_page(reloc->offset));
-		iowrite32(reloc->delta, reloc_entry);
-		io_mapping_unmap_atomic(reloc_page);
-	}
+	if (use_cpu_reloc(obj))
+		ret = relocate_entry_cpu(obj, reloc);
+	else
+		ret = relocate_entry_gtt(obj, reloc);
 
 	/* and update the user's relocation entry */
 	reloc->presumed_offset = target_offset;

From 3414caf63421762e57b26aa999e5187b42ee1606 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 21 Aug 2013 08:08:55 -0700
Subject: [PATCH 53/62] drm/i915: drop WaMbcDriverBootEnable workaround
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turns out the BIOS will do this for us as needed, and if we try to do it
again we risk hangs or other bad behavior.

Note that this seems to break libva on ChromeOS after resumes (but
strangely _not_ after booting up).

This essentially reverts

commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0
Author: Jesse Barnes <jbarnes@virtuousgeek.org>
Date:   Thu Jun 14 11:04:48 2012 -0700

    drm/i915: load boot context at driver init time

and

commit b3bf076697a68a8577f4a5f7407de0bb2b3b56ac
Author: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date:   Tue Nov 20 13:27:44 2012 -0200

    drm/i915: implement WaMbcDriverBootEnable on Haswell

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reported-and-Tested-by: Stéphane Marchesin <marcheu@chromium.org>
[danvet: Add note about impact and regression citation.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_pm.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4f0857346bfd..cbab95dce352 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4864,10 +4864,6 @@ static void gen6_init_clock_gating(struct drm_device *dev)
 		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
-	/* WaMbcDriverBootEnable:snb */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	g4x_disable_trickle_feed(dev);
 
 	/* The default value should be 0x200 according to docs, but the two
@@ -4963,10 +4959,6 @@ static void haswell_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
-	/* WaMbcDriverBootEnable:hsw */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	/* WaSwitchSolVfFArbitrationPriority:hsw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -5050,10 +5042,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 
 	g4x_disable_trickle_feed(dev);
 
-	/* WaMbcDriverBootEnable:ivb */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	/* WaVSRefCountFullforceMissDisable:ivb */
 	gen7_setup_fixed_func_scheduler(dev_priv);
 
@@ -5113,11 +5101,6 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
 		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-	/* WaMbcDriverBootEnable:vlv */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
-
 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
 	 * gating disable must be set.  Failure to set it results in
 	 * flickering pixels due to Z write ordering failures after

From 215733fadb87709e91b3a622d786865292c9ab11 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:07 -0300
Subject: [PATCH 54/62] drm/i915: grab force_wake when restoring LCPLL

If LCPLL is disabled, there's a chance we might be in package C8 state
or deeper, and we'll get a hard hang when restoring LCPLL (also, a red
led lights up on my motherboard). So grab the force_wake, which will
get us out of RC6 and, as a consequence, out of PC8+ (since we need
RC6 to get into PC8+).

Note: Discussions with hw designers are still ongoing what exactly
goes boom here. But I think we can go ahead and just merge this little
hack for now until it's clear what we actually need.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
[danvet: Add small note about the current state of the discussion
around this hack.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7a40427823c7..a2c8cb360ae8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6033,6 +6033,10 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
 		return;
 
+	/* Make sure we're not on PC8 state before disabling PC8, otherwise
+	 * we'll hang the machine! */
+	dev_priv->uncore.funcs.force_wake_get(dev_priv);
+
 	if (val & LCPLL_POWER_DOWN_ALLOW) {
 		val &= ~LCPLL_POWER_DOWN_ALLOW;
 		I915_WRITE(LCPLL_CTL, val);
@@ -6060,6 +6064,8 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 			DRM_ERROR("Switching back to LCPLL failed\n");
 	}
+
+	dev_priv->uncore.funcs.force_wake_put(dev_priv);
 }
 
 static void haswell_modeset_global_resources(struct drm_device *dev)

From bd633a7c1ca0663ba10426a0a6aeda0257cbe804 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:08 -0300
Subject: [PATCH 55/62] drm/i915: fix SDEIMR assertion when disabling LCPLL

This was causing WARNs in one machine, so instead of trying to guess
exactly which hotplug bits should exist, just do the test on the
non-HPD bits. We don't care about the state of the hotplug bits, we
just care about the others, that need to be 1.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a2c8cb360ae8..fc92773ef552 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5932,11 +5932,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
 	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
 	struct intel_crtc *crtc;
 	unsigned long irqflags;
-	uint32_t val, pch_hpd_mask;
-
-	pch_hpd_mask = SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT;
-	if (!(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE))
-		pch_hpd_mask |= SDE_PORTD_HOTPLUG_CPT | SDE_CRT_HOTPLUG_CPT;
+	uint32_t val;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
 		WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
@@ -5962,7 +5958,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
 	WARN((val & ~DE_PCH_EVENT_IVB) != val,
 	     "Unexpected DEIMR bits enabled: 0x%x\n", val);
 	val = I915_READ(SDEIMR);
-	WARN((val & ~pch_hpd_mask) != val,
+	WARN((val | SDE_HOTPLUG_MASK_CPT) != 0xffffffff,
 	     "Unexpected SDEIMR bits enabled: 0x%x\n", val);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }

From c67a470b1db781c54be07a87217cff35a91f564e Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:09 -0300
Subject: [PATCH 56/62] drm/i915: allow package C8+ states on Haswell
 (disabled)

This patch allows PC8+ states on Haswell. These states can only be
reached when all the display outputs are disabled, and they allow some
more power savings.

The fact that the graphics device is allowing PC8+ doesn't mean that
the machine will actually enter PC8+: all the other devices also need
to allow PC8+.

For now this option is disabled by default. You need i915.allow_pc8=1
if you want it.

This patch adds a big comment inside i915_drv.h explaining how it
works and how it tracks things. Read it.

v2: (this is not really v2, many previous versions were already sent,
     but they had different names)
    - Use the new functions to enable/disable GTIMR and GEN6_PMIMR
    - Rename almost all variables and functions to names suggested by
      Chris
    - More WARNs on the IRQ handling code
    - Also disable PC8 when there's GPU work to do (thanks to Ben for
      the help on this), so apps can run caster
    - Enable PC8 on a delayed work function that is delayed for 5
      seconds. This makes sure we only enable PC8+ if we're really
      idle
    - Make sure we're not in PC8+ when suspending
v3: - WARN if IRQs are disabled on __wait_seqno
    - Replace some DRM_ERRORs with WARNs
    - Fix calls to restore GT and PM interrupts
    - Use intel_mark_busy instead of intel_ring_advance to disable PC8
v4: - Use the force_wake, Luke!
v5: - Remove the "IIR is not zero" WARNs
    - Move the force_wake chunk to its own patch
    - Only restore what's missing from RC6, not everything

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c      |  10 ++
 drivers/gpu/drm/i915/i915_drv.c      |  11 ++
 drivers/gpu/drm/i915/i915_drv.h      |  72 ++++++++++++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +
 drivers/gpu/drm/i915/i915_irq.c      | 101 ++++++++++++++++
 drivers/gpu/drm/i915/intel_display.c | 170 ++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_dp.c      |   3 +
 drivers/gpu/drm/i915/intel_drv.h     |   8 ++
 drivers/gpu/drm/i915/intel_i2c.c     |   2 +
 drivers/gpu/drm/i915/intel_pm.c      |  13 +-
 10 files changed, 390 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5a051eaab9ef..09e8ef910ec5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1486,6 +1486,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	mutex_init(&dev_priv->rps.hw_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
 
+	mutex_init(&dev_priv->pc8.lock);
+	dev_priv->pc8.requirements_met = false;
+	dev_priv->pc8.gpu_idle = false;
+	dev_priv->pc8.irqs_disabled = false;
+	dev_priv->pc8.enabled = false;
+	dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */
+	INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work);
+
 	i915_dump_device_info(dev_priv);
 
 	/* Not all pre-production machines fall into this category, only the
@@ -1740,6 +1748,8 @@ int i915_driver_unload(struct drm_device *dev)
 	cancel_work_sync(&dev_priv->gpu_error.work);
 	i915_destroy_error_state(dev);
 
+	cancel_delayed_work_sync(&dev_priv->pc8.enable_work);
+
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fd9fb2c25691..84d48b82e3f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -141,6 +141,10 @@ module_param_named(fastboot, i915_fastboot, bool, 0600);
 MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
 		 "(default: false)");
 
+int i915_enable_pc8 __read_mostly = 0;
+module_param_named(enable_pc8, i915_enable_pc8, int, 0600);
+MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: false)");
+
 bool i915_prefault_disable __read_mostly;
 module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
 MODULE_PARM_DESC(prefault_disable,
@@ -557,6 +561,9 @@ static int i915_drm_freeze(struct drm_device *dev)
 	dev_priv->modeset_restore = MODESET_SUSPENDED;
 	mutex_unlock(&dev_priv->modeset_restore_lock);
 
+	/* We do a lot of poking in a lot of registers, make sure they work
+	 * properly. */
+	hsw_disable_package_c8(dev_priv);
 	intel_set_power_well(dev, true);
 
 	drm_kms_helper_poll_disable(dev);
@@ -713,6 +720,10 @@ static int __i915_drm_thaw(struct drm_device *dev)
 		schedule_work(&dev_priv->console_resume_work);
 	}
 
+	/* Undo what we did at i915_drm_freeze so the refcount goes back to the
+	 * expected level. */
+	hsw_enable_package_c8(dev_priv);
+
 	mutex_lock(&dev_priv->modeset_restore_lock);
 	dev_priv->modeset_restore = MODESET_DONE;
 	mutex_unlock(&dev_priv->modeset_restore_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f423a94c1b25..627c8216db1f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1076,6 +1076,75 @@ struct intel_wm_level {
 	uint32_t fbc_val;
 };
 
+/*
+ * This struct tracks the state needed for the Package C8+ feature.
+ *
+ * Package states C8 and deeper are really deep PC states that can only be
+ * reached when all the devices on the system allow it, so even if the graphics
+ * device allows PC8+, it doesn't mean the system will actually get to these
+ * states.
+ *
+ * Our driver only allows PC8+ when all the outputs are disabled, the power well
+ * is disabled and the GPU is idle. When these conditions are met, we manually
+ * do the other conditions: disable the interrupts, clocks and switch LCPLL
+ * refclk to Fclk.
+ *
+ * When we really reach PC8 or deeper states (not just when we allow it) we lose
+ * the state of some registers, so when we come back from PC8+ we need to
+ * restore this state. We don't get into PC8+ if we're not in RC6, so we don't
+ * need to take care of the registers kept by RC6.
+ *
+ * The interrupt disabling is part of the requirements. We can only leave the
+ * PCH HPD interrupts enabled. If we're in PC8+ and we get another interrupt we
+ * can lock the machine.
+ *
+ * Ideally every piece of our code that needs PC8+ disabled would call
+ * hsw_disable_package_c8, which would increment disable_count and prevent the
+ * system from reaching PC8+. But we don't have a symmetric way to do this for
+ * everything, so we have the requirements_met and gpu_idle variables. When we
+ * switch requirements_met or gpu_idle to true we decrease disable_count, and
+ * increase it in the opposite case. The requirements_met variable is true when
+ * all the CRTCs, encoders and the power well are disabled. The gpu_idle
+ * variable is true when the GPU is idle.
+ *
+ * In addition to everything, we only actually enable PC8+ if disable_count
+ * stays at zero for at least some seconds. This is implemented with the
+ * enable_work variable. We do this so we don't enable/disable PC8 dozens of
+ * consecutive times when all screens are disabled and some background app
+ * queries the state of our connectors, or we have some application constantly
+ * waking up to use the GPU. Only after the enable_work function actually
+ * enables PC8+ the "enable" variable will become true, which means that it can
+ * be false even if disable_count is 0.
+ *
+ * The irqs_disabled variable becomes true exactly after we disable the IRQs and
+ * goes back to false exactly before we reenable the IRQs. We use this variable
+ * to check if someone is trying to enable/disable IRQs while they're supposed
+ * to be disabled. This shouldn't happen and we'll print some error messages in
+ * case it happens, but if it actually happens we'll also update the variables
+ * inside struct regsave so when we restore the IRQs they will contain the
+ * latest expected values.
+ *
+ * For more, read "Display Sequences for Package C8" on our documentation.
+ */
+struct i915_package_c8 {
+	bool requirements_met;
+	bool gpu_idle;
+	bool irqs_disabled;
+	/* Only true after the delayed work task actually enables it. */
+	bool enabled;
+	int disable_count;
+	struct mutex lock;
+	struct delayed_work enable_work;
+
+	struct {
+		uint32_t deimr;
+		uint32_t sdeimr;
+		uint32_t gtimr;
+		uint32_t gtier;
+		uint32_t gen6_pmimr;
+	} regsave;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1260,6 +1329,8 @@ typedef struct drm_i915_private {
 		uint16_t cur_latency[5];
 	} wm;
 
+	struct i915_package_c8 pc8;
+
 	/* Old dri1 support infrastructure, beware the dragons ya fools entering
 	 * here! */
 	struct i915_dri1_state dri1;
@@ -1635,6 +1706,7 @@ extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern int i915_disable_power_well __read_mostly;
 extern int i915_enable_ips __read_mostly;
 extern bool i915_fastboot __read_mostly;
+extern int i915_enable_pc8 __read_mostly;
 extern bool i915_prefault_disable __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 449575b85b31..23c42567631e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1005,6 +1005,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	bool wait_forever = true;
 	int ret;
 
+	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
+
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index caf83da17bb0..a03b445ceb5f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -85,6 +85,12 @@ ironlake_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.deimr &= ~mask;
+		return;
+	}
+
 	if ((dev_priv->irq_mask & mask) != 0) {
 		dev_priv->irq_mask &= ~mask;
 		I915_WRITE(DEIMR, dev_priv->irq_mask);
@@ -97,6 +103,12 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.deimr |= mask;
+		return;
+	}
+
 	if ((dev_priv->irq_mask & mask) != mask) {
 		dev_priv->irq_mask |= mask;
 		I915_WRITE(DEIMR, dev_priv->irq_mask);
@@ -116,6 +128,14 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv,
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.gtimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.gtimr |= (~enabled_irq_mask &
+						interrupt_mask);
+		return;
+	}
+
 	dev_priv->gt_irq_mask &= ~interrupt_mask;
 	dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask);
 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
@@ -146,6 +166,14 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.gen6_pmimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.gen6_pmimr |= (~enabled_irq_mask &
+						     interrupt_mask);
+		return;
+	}
+
 	new_val = dev_priv->pm_irq_mask;
 	new_val &= ~interrupt_mask;
 	new_val |= (~enabled_irq_mask & interrupt_mask);
@@ -257,6 +285,15 @@ static void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
 
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled &&
+	    (interrupt_mask & SDE_HOTPLUG_MASK_CPT)) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.sdeimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.sdeimr |= (~enabled_irq_mask &
+						 interrupt_mask);
+		return;
+	}
+
 	I915_WRITE(SDEIMR, sdeimr);
 	POSTING_READ(SDEIMR);
 }
@@ -3113,3 +3150,67 @@ void intel_hpd_init(struct drm_device *dev)
 		dev_priv->display.hpd_irq_setup(dev);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
+
+/* Disable interrupts so we can allow Package C8+. */
+void hsw_pc8_disable_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	dev_priv->pc8.regsave.deimr = I915_READ(DEIMR);
+	dev_priv->pc8.regsave.sdeimr = I915_READ(SDEIMR);
+	dev_priv->pc8.regsave.gtimr = I915_READ(GTIMR);
+	dev_priv->pc8.regsave.gtier = I915_READ(GTIER);
+	dev_priv->pc8.regsave.gen6_pmimr = I915_READ(GEN6_PMIMR);
+
+	ironlake_disable_display_irq(dev_priv, ~DE_PCH_EVENT_IVB);
+	ibx_disable_display_interrupt(dev_priv, ~SDE_HOTPLUG_MASK_CPT);
+	ilk_disable_gt_irq(dev_priv, 0xffffffff);
+	snb_disable_pm_irq(dev_priv, 0xffffffff);
+
+	dev_priv->pc8.irqs_disabled = true;
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+/* Restore interrupts so we can recover from Package C8+. */
+void hsw_pc8_restore_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+	uint32_t val, expected;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	val = I915_READ(DEIMR);
+	expected = ~DE_PCH_EVENT_IVB;
+	WARN(val != expected, "DEIMR is 0x%08x, not 0x%08x\n", val, expected);
+
+	val = I915_READ(SDEIMR) & ~SDE_HOTPLUG_MASK_CPT;
+	expected = ~SDE_HOTPLUG_MASK_CPT;
+	WARN(val != expected, "SDEIMR non-HPD bits are 0x%08x, not 0x%08x\n",
+	     val, expected);
+
+	val = I915_READ(GTIMR);
+	expected = 0xffffffff;
+	WARN(val != expected, "GTIMR is 0x%08x, not 0x%08x\n", val, expected);
+
+	val = I915_READ(GEN6_PMIMR);
+	expected = 0xffffffff;
+	WARN(val != expected, "GEN6_PMIMR is 0x%08x, not 0x%08x\n", val,
+	     expected);
+
+	dev_priv->pc8.irqs_disabled = false;
+
+	ironlake_enable_display_irq(dev_priv, ~dev_priv->pc8.regsave.deimr);
+	ibx_enable_display_interrupt(dev_priv,
+				     ~dev_priv->pc8.regsave.sdeimr &
+				     ~SDE_HOTPLUG_MASK_CPT);
+	ilk_enable_gt_irq(dev_priv, ~dev_priv->pc8.regsave.gtimr);
+	snb_enable_pm_irq(dev_priv, ~dev_priv->pc8.regsave.gen6_pmimr);
+	I915_WRITE(GTIER, dev_priv->pc8.regsave.gtier);
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fc92773ef552..1fad9de3d810 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6064,6 +6064,166 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 	dev_priv->uncore.funcs.force_wake_put(dev_priv);
 }
 
+void hsw_enable_pc8_work(struct work_struct *__work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(to_delayed_work(__work), struct drm_i915_private,
+			     pc8.enable_work);
+	struct drm_device *dev = dev_priv->dev;
+	uint32_t val;
+
+	if (dev_priv->pc8.enabled)
+		return;
+
+	DRM_DEBUG_KMS("Enabling package C8+\n");
+
+	dev_priv->pc8.enabled = true;
+
+	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+
+	lpt_disable_clkout_dp(dev);
+	hsw_pc8_disable_interrupts(dev);
+	hsw_disable_lcpll(dev_priv, true, true);
+}
+
+static void __hsw_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
+	WARN(dev_priv->pc8.disable_count < 1,
+	     "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
+
+	dev_priv->pc8.disable_count--;
+	if (dev_priv->pc8.disable_count != 0)
+		return;
+
+	schedule_delayed_work(&dev_priv->pc8.enable_work,
+			      msecs_to_jiffies(5 * 1000));
+}
+
+static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	uint32_t val;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
+	WARN(dev_priv->pc8.disable_count < 0,
+	     "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
+
+	dev_priv->pc8.disable_count++;
+	if (dev_priv->pc8.disable_count != 1)
+		return;
+
+	cancel_delayed_work_sync(&dev_priv->pc8.enable_work);
+	if (!dev_priv->pc8.enabled)
+		return;
+
+	DRM_DEBUG_KMS("Disabling package C8+\n");
+
+	hsw_restore_lcpll(dev_priv);
+	hsw_pc8_restore_interrupts(dev);
+	lpt_init_pch_refclk(dev);
+
+	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val |= PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+
+	intel_prepare_ddi(dev);
+	i915_gem_init_swizzling(dev);
+	mutex_lock(&dev_priv->rps.hw_lock);
+	gen6_update_ring_freq(dev);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+	dev_priv->pc8.enabled = false;
+}
+
+void hsw_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->pc8.lock);
+	__hsw_enable_package_c8(dev_priv);
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+void hsw_disable_package_c8(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->pc8.lock);
+	__hsw_disable_package_c8(dev_priv);
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+static bool hsw_can_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_crtc *crtc;
+	uint32_t val;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		if (crtc->base.enabled)
+			return false;
+
+	/* This case is still possible since we have the i915.disable_power_well
+	 * parameter and also the KVMr or something else might be requesting the
+	 * power well. */
+	val = I915_READ(HSW_PWR_WELL_DRIVER);
+	if (val != 0) {
+		DRM_DEBUG_KMS("Not enabling PC8: power well on\n");
+		return false;
+	}
+
+	return true;
+}
+
+/* Since we're called from modeset_global_resources there's no way to
+ * symmetrically increase and decrease the refcount, so we use
+ * dev_priv->pc8.requirements_met to track whether we already have the refcount
+ * or not.
+ */
+static void hsw_update_package_c8(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool allow;
+
+	if (!i915_enable_pc8)
+		return;
+
+	mutex_lock(&dev_priv->pc8.lock);
+
+	allow = hsw_can_enable_package_c8(dev_priv);
+
+	if (allow == dev_priv->pc8.requirements_met)
+		goto done;
+
+	dev_priv->pc8.requirements_met = allow;
+
+	if (allow)
+		__hsw_enable_package_c8(dev_priv);
+	else
+		__hsw_disable_package_c8(dev_priv);
+
+done:
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv)
+{
+	if (!dev_priv->pc8.gpu_idle) {
+		dev_priv->pc8.gpu_idle = true;
+		hsw_enable_package_c8(dev_priv);
+	}
+}
+
+static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv)
+{
+	if (dev_priv->pc8.gpu_idle) {
+		dev_priv->pc8.gpu_idle = false;
+		hsw_disable_package_c8(dev_priv);
+	}
+}
+
 static void haswell_modeset_global_resources(struct drm_device *dev)
 {
 	bool enable = false;
@@ -6079,6 +6239,8 @@ static void haswell_modeset_global_resources(struct drm_device *dev)
 	}
 
 	intel_set_power_well(dev, enable);
+
+	hsw_update_package_c8(dev);
 }
 
 static int haswell_crtc_mode_set(struct drm_crtc *crtc,
@@ -7310,13 +7472,19 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc)
 
 void intel_mark_busy(struct drm_device *dev)
 {
-	i915_update_gfx_val(dev->dev_private);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	hsw_package_c8_gpu_busy(dev_priv);
+	i915_update_gfx_val(dev_priv);
 }
 
 void intel_mark_idle(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 
+	hsw_package_c8_gpu_idle(dev_priv);
+
 	if (!i915_powersave)
 		return;
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2726d4d41722..2151d13772b8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -344,6 +344,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	else
 		precharge = 5;
 
+	intel_aux_display_runtime_get(dev_priv);
+
 	/* Try to wait for any previous AUX channel activity */
 	for (try = 0; try < 3; try++) {
 		status = I915_READ_NOTRACE(ch_ctl);
@@ -434,6 +436,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	ret = recv_bytes;
 out:
 	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
+	intel_aux_display_runtime_put(dev_priv);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8222f2426b47..176080822a74 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -745,6 +745,7 @@ extern void intel_set_power_well(struct drm_device *dev, bool enable);
 extern void intel_enable_gt_powersave(struct drm_device *dev);
 extern void intel_disable_gt_powersave(struct drm_device *dev);
 extern void ironlake_teardown_rc6(struct drm_device *dev);
+void gen6_update_ring_freq(struct drm_device *dev);
 
 extern bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe);
@@ -784,5 +785,12 @@ extern void ilk_disable_gt_irq(struct drm_i915_private *dev_priv,
 extern void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 extern void snb_disable_pm_irq(struct drm_i915_private *dev_priv,
 			       uint32_t mask);
+extern void hsw_enable_pc8_work(struct work_struct *__work);
+extern void hsw_enable_package_c8(struct drm_i915_private *dev_priv);
+extern void hsw_disable_package_c8(struct drm_i915_private *dev_priv);
+extern void hsw_pc8_disable_interrupts(struct drm_device *dev);
+extern void hsw_pc8_restore_interrupts(struct drm_device *dev);
+extern void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
+extern void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 639fe192997c..d1c1e0f7f262 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -398,6 +398,7 @@ gmbus_xfer(struct i2c_adapter *adapter,
 	int i, reg_offset;
 	int ret = 0;
 
+	intel_aux_display_runtime_get(dev_priv);
 	mutex_lock(&dev_priv->gmbus_mutex);
 
 	if (bus->force_bit) {
@@ -497,6 +498,7 @@ timeout:
 
 out:
 	mutex_unlock(&dev_priv->gmbus_mutex);
+	intel_aux_display_runtime_put(dev_priv);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index cbab95dce352..0150ba598bf0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3607,7 +3607,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 	gen6_gt_force_wake_put(dev_priv);
 }
 
-static void gen6_update_ring_freq(struct drm_device *dev)
+void gen6_update_ring_freq(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int min_freq = 15;
@@ -5398,6 +5398,17 @@ void intel_init_power_well(struct drm_device *dev)
 		I915_WRITE(HSW_PWR_WELL_BIOS, 0);
 }
 
+/* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
+void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
+{
+	hsw_disable_package_c8(dev_priv);
+}
+
+void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
+{
+	hsw_enable_package_c8(dev_priv);
+}
+
 /* Set up chip specific power management-related functions */
 void intel_init_pm(struct drm_device *dev)
 {

From 371db66add2ef701abd3f4295c4cd6bbc24cd5ca Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:10 -0300
Subject: [PATCH 57/62] drm/i915: add i915_pc8_status debugfs file

Make it print the value of the variables on the PC8 struct.

v2: Update to recent renames and add the new fields.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 236d97e51c3a..39df30e7d9af 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1788,6 +1788,31 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 	power *= units;
 
 	seq_printf(m, "%llu", (long long unsigned)power);
+
+	return 0;
+}
+
+static int i915_pc8_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_HASWELL(dev)) {
+		seq_puts(m, "not supported\n");
+		return 0;
+	}
+
+	mutex_lock(&dev_priv->pc8.lock);
+	seq_printf(m, "Requirements met: %s\n",
+		   yesno(dev_priv->pc8.requirements_met));
+	seq_printf(m, "GPU idle: %s\n", yesno(dev_priv->pc8.gpu_idle));
+	seq_printf(m, "Disable count: %d\n", dev_priv->pc8.disable_count);
+	seq_printf(m, "IRQs disabled: %s\n",
+		   yesno(dev_priv->pc8.irqs_disabled));
+	seq_printf(m, "Enabled: %s\n", yesno(dev_priv->pc8.enabled));
+	mutex_unlock(&dev_priv->pc8.lock);
+
 	return 0;
 }
 
@@ -2231,6 +2256,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_llc", i915_llc, 0},
 	{"i915_edp_psr_status", i915_edp_psr_status, 0},
 	{"i915_energy_uJ", i915_energy_uJ, 0},
+	{"i915_pc8_status", i915_pc8_status, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 

From 900587453219f6090a1e28db1bb790aa64820131 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:11 -0300
Subject: [PATCH 58/62] drm/i915: add i915.pc8_timeout function

We currently only enter PC8+ after all its required conditions are
met, there's no rendering, and we stay like that for at least 5
seconds.

I chose "5 seconds" because this value is conservative and won't make
us enter/leave PC8+ thousands of times after the screen is off: some
desktop environments have applications that wake up and do rendering
every 1-3 seconds, even when the screen is off and the machine is
completely idle.

But when I was testing my PC8+ patches I set the default value to
100ms so I could use the bad-behaving desktop environments to
stress-test my patches. I also thought it would be a good idea to ask
our power management team to test different values, but I'm pretty
sure they would ask me for an easy way to change the timeout. So to
help these 2 cases I decided to create an option that would make it
easier to change the default value. I also expect people making
specific products that use our driver could try to find the perfect
timeout for them.

Anyway, fixing the bad-behaving applications will always lead to
better power savings than just changing the timeout value: you need to
stop waking the Kernel, not quickly put it back to sleep again after
you wake it for nothing. Bad sleep leads to bad mood!

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c      | 4 ++++
 drivers/gpu/drm/i915/i915_drv.h      | 1 +
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 84d48b82e3f1..6dc00a190669 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -145,6 +145,10 @@ int i915_enable_pc8 __read_mostly = 0;
 module_param_named(enable_pc8, i915_enable_pc8, int, 0600);
 MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: false)");
 
+int i915_pc8_timeout __read_mostly = 5000;
+module_param_named(pc8_timeout, i915_pc8_timeout, int, 0600);
+MODULE_PARM_DESC(pc8_timeout, "Number of msecs of idleness required to enter PC8+ (default: 5000)");
+
 bool i915_prefault_disable __read_mostly;
 module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
 MODULE_PARM_DESC(prefault_disable,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 627c8216db1f..5f8a638c5145 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1707,6 +1707,7 @@ extern int i915_disable_power_well __read_mostly;
 extern int i915_enable_ips __read_mostly;
 extern bool i915_fastboot __read_mostly;
 extern int i915_enable_pc8 __read_mostly;
+extern int i915_pc8_timeout __read_mostly;
 extern bool i915_prefault_disable __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1fad9de3d810..b29954ac6bfd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6101,7 +6101,7 @@ static void __hsw_enable_package_c8(struct drm_i915_private *dev_priv)
 		return;
 
 	schedule_delayed_work(&dev_priv->pc8.enable_work,
-			      msecs_to_jiffies(5 * 1000));
+			      msecs_to_jiffies(i915_pc8_timeout));
 }
 
 static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv)

From e27e9708c45879f16fb824a2da94cd65e150a0c8 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Mon, 19 Aug 2013 13:18:12 -0300
Subject: [PATCH 59/62] drm/i915: enable Package C8+ by default

This should be working, so enable it by default. Also easy to revert.

v2: Rebase, s/allow/enable/.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6dc00a190669..beb295634a49 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -141,9 +141,9 @@ module_param_named(fastboot, i915_fastboot, bool, 0600);
 MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
 		 "(default: false)");
 
-int i915_enable_pc8 __read_mostly = 0;
+int i915_enable_pc8 __read_mostly = 1;
 module_param_named(enable_pc8, i915_enable_pc8, int, 0600);
-MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: false)");
+MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: true)");
 
 int i915_pc8_timeout __read_mostly = 5000;
 module_param_named(pc8_timeout, i915_pc8_timeout, int, 0600);

From 35d8f2eb259e2d32c4bb67e9733ba0cba031f64f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 21 Aug 2013 23:38:08 +0200
Subject: [PATCH 60/62] drm/i915: Use POSTING_READ in lcpll code

If we don't use the return value of a mmio read our coding style is to
use the POSTING_READ macro. This avoids cluttering the mmio traces.

While at it add the missing posting read in the lcpll enable function
that Paulo spotted.

v2: Drop the _NOTRACE changes, tracing such wait_for loops in the modeset
code might actually be rather useful!

Cc: Paulo Zanoni <przanoni@gmail.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b29954ac6bfd..b4daa640a6d8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6036,13 +6036,14 @@ void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 	if (val & LCPLL_POWER_DOWN_ALLOW) {
 		val &= ~LCPLL_POWER_DOWN_ALLOW;
 		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
 	}
 
 	val = I915_READ(D_COMP);
 	val |= D_COMP_COMP_FORCE;
 	val &= ~D_COMP_COMP_DISABLE;
 	I915_WRITE(D_COMP, val);
-	I915_READ(D_COMP);
+	POSTING_READ(D_COMP);
 
 	val = I915_READ(LCPLL_CTL);
 	val &= ~LCPLL_PLL_DISABLE;

From e8016055335687b90e7cd5bbfa30e0c269417f34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 22 Aug 2013 19:23:13 +0300
Subject: [PATCH 61/62] drm/i915: Fix context size calculation on SNB/IVB/VLV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All the different context sizes reported in the CXT_SIZE register
aren't meant to be simply added together.

While BSpec is somewhat unclear on the topic of the actual context
size, empirical tests have now revealed the truth. So let's add a
big fat comment to remind people how it all works.

As a result of correctly interpreting CXT_SIZE, the IVB context
size is reduced from three pages to two, while SNB context size
remains at two pages.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Acked-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c4c509895826..76d965c38d7e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1705,15 +1705,26 @@
  */
 #define CCID			0x2180
 #define   CCID_EN		(1<<0)
+/*
+ * Notes on SNB/IVB/VLV context size:
+ * - Power context is saved elsewhere (LLC or stolen)
+ * - Ring/execlist context is saved on SNB, not on IVB
+ * - Extended context size already includes render context size
+ * - We always need to follow the extended context size.
+ *   SNB BSpec has comments indicating that we should use the
+ *   render context size instead if execlists are disabled, but
+ *   based on empirical testing that's just nonsense.
+ * - Pipelined/VF state is saved on SNB/IVB respectively
+ * - GT1 size just indicates how much of render context
+ *   doesn't need saving on GT1
+ */
 #define CXT_SIZE		0x21a0
 #define GEN6_CXT_POWER_SIZE(cxt_reg)	((cxt_reg >> 24) & 0x3f)
 #define GEN6_CXT_RING_SIZE(cxt_reg)	((cxt_reg >> 18) & 0x3f)
 #define GEN6_CXT_RENDER_SIZE(cxt_reg)	((cxt_reg >> 12) & 0x3f)
 #define GEN6_CXT_EXTENDED_SIZE(cxt_reg)	((cxt_reg >> 6) & 0x3f)
 #define GEN6_CXT_PIPELINE_SIZE(cxt_reg)	((cxt_reg >> 0) & 0x3f)
-#define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_POWER_SIZE(cxt_reg) + \
-					GEN6_CXT_RING_SIZE(cxt_reg) + \
-					GEN6_CXT_RENDER_SIZE(cxt_reg) + \
+#define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_RING_SIZE(cxt_reg) + \
 					GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
 					GEN6_CXT_PIPELINE_SIZE(cxt_reg))
 #define GEN7_CXT_SIZE		0x21a8
@@ -1723,11 +1734,7 @@
 #define GEN7_CXT_EXTENDED_SIZE(ctx_reg)	((ctx_reg >> 9) & 0x7f)
 #define GEN7_CXT_GT1_SIZE(ctx_reg)	((ctx_reg >> 6) & 0x7)
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	((ctx_reg >> 0) & 0x3f)
-#define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_POWER_SIZE(ctx_reg) + \
-					 GEN7_CXT_RING_SIZE(ctx_reg) + \
-					 GEN7_CXT_RENDER_SIZE(ctx_reg) + \
-					 GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
-					 GEN7_CXT_GT1_SIZE(ctx_reg) + \
+#define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full

From fb1ae911f4e58c2cf28fcd48b59f54d17283da07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 22 Aug 2013 19:21:30 +0300
Subject: [PATCH 62/62] drm/i915: Print seqnos as unsigned in debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I don't like seeing signed seqnos. Make them unsigned.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 39df30e7d9af..55ab9246e1b9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -100,7 +100,7 @@ static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %d %d %d%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),