From 035dc1e0f9008b48630e02bf0eaa7cc547416d1d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 3 Jul 2013 12:56:54 +0200
Subject: [PATCH 1/9] drm/i915: reinit status page registers after gpu reset

This fixes gpu reset on my gm45 - without this patch the bsd thing is
forever stuck since the seqno updates never reach the status page.

Tbh I have no idea how this ever worked without rewriting the hws
registers after a gpu reset.

To satisfy my OCD also give the functions a bit more consistent names:
- Use status_page everywhere, also for the physical addressed one.
- Use init for the allocation part and setup for the register setup
  part consistently.

Long term I'd really like to share the hw init parts completely
between gpu reset, resume and driver load, i.e. to call
i915_gem_init_hw instead of the individual pieces we might need.

v2: Add the missing paragraph to the commit message about what bug
exactly this patch here fixes.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65495
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: lu hua <huax.lu@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 29 ++++++++++++++++---------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e51ab552046c..18ca76e3e5ef 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -379,6 +379,17 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 	return I915_READ(acthd_reg);
 }
 
+static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	u32 addr;
+
+	addr = dev_priv->status_page_dmah->busaddr;
+	if (INTEL_INFO(ring->dev)->gen >= 4)
+		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+	I915_WRITE(HWS_PGA, addr);
+}
+
 static int init_ring_common(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
@@ -390,6 +401,11 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	if (HAS_FORCE_WAKE(dev))
 		gen6_gt_force_wake_get(dev_priv);
 
+	if (I915_NEED_GFX_HWS(dev))
+		intel_ring_setup_status_page(ring);
+	else
+		ring_setup_phys_status_page(ring);
+
 	/* Stop the ring if it's running. */
 	I915_WRITE_CTL(ring, 0);
 	I915_WRITE_HEAD(ring, 0);
@@ -1223,7 +1239,6 @@ static int init_status_page(struct intel_ring_buffer *ring)
 	ring->status_page.obj = obj;
 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
 
-	intel_ring_setup_status_page(ring);
 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
 			ring->name, ring->status_page.gfx_addr);
 
@@ -1237,10 +1252,9 @@ err:
 	return ret;
 }
 
-static int init_phys_hws_pga(struct intel_ring_buffer *ring)
+static int init_phys_status_page(struct intel_ring_buffer *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 addr;
 
 	if (!dev_priv->status_page_dmah) {
 		dev_priv->status_page_dmah =
@@ -1249,11 +1263,6 @@ static int init_phys_hws_pga(struct intel_ring_buffer *ring)
 			return -ENOMEM;
 	}
 
-	addr = dev_priv->status_page_dmah->busaddr;
-	if (INTEL_INFO(ring->dev)->gen >= 4)
-		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
-	I915_WRITE(HWS_PGA, addr);
-
 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
 
@@ -1281,7 +1290,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			return ret;
 	} else {
 		BUG_ON(ring->id != RCS);
-		ret = init_phys_hws_pga(ring);
+		ret = init_phys_status_page(ring);
 		if (ret)
 			return ret;
 	}
@@ -1893,7 +1902,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	}
 
 	if (!I915_NEED_GFX_HWS(dev)) {
-		ret = init_phys_hws_pga(ring);
+		ret = init_phys_status_page(ring);
 		if (ret)
 			return ret;
 	}

From bf51d5e2cda5d36d98e4b46ac7fca9461e512c41 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Wed, 3 Jul 2013 17:12:13 -0300
Subject: [PATCH 2/9] drm/i915: switch disable_power_well default value to 1

Now that the audio driver is using our power well API, everything
should be working correctly, so let's give it a try.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 062cbda1bf4a..f4af1ca0fb62 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -123,10 +123,10 @@ module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 060
 MODULE_PARM_DESC(preliminary_hw_support,
 		"Enable preliminary hardware support. (default: false)");
 
-int i915_disable_power_well __read_mostly = 0;
+int i915_disable_power_well __read_mostly = 1;
 module_param_named(disable_power_well, i915_disable_power_well, int, 0600);
 MODULE_PARM_DESC(disable_power_well,
-		 "Disable the power well when possible (default: false)");
+		 "Disable the power well when possible (default: true)");
 
 int i915_enable_ips __read_mostly = 1;
 module_param_named(enable_ips, i915_enable_ips, int, 0600);

From 067556084a0e412013af6b0250a3143ae5afde6d Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Fri, 5 Jul 2013 18:53:29 +0800
Subject: [PATCH 3/9] drm/i915: Correct obj->mm_list link to
 dev_priv->dev_priv->mm.inactive_list

obj->mm_list link to dev_priv->mm.inactive_list/active_list
obj->global_list link to dev_priv->mm.unbound_list/bound_list

This regression has been introduced in

commit 93927ca52a55c23e0a6a305e7e9082e8411ac9fa
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Jan 10 18:03:00 2013 +0100

    drm/i915: Revert shrinker changes from "Track unbound pages"

Cc: stable@vger.kernel.org
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
[danvet: Add regression notice.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 769f75262feb..7f368d79f7d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4623,7 +4623,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 		if (obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, global_list)
+	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list)
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 

From aaf8a5167291b65e9116cb8736d862965b57c13a Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 5 Jul 2013 23:39:50 +0200
Subject: [PATCH 4/9] drm/i915: fix up ring cleanup for the i830/i845 CS tlb
 w/a

It's not a good idea to also run the pipe_control cleanup.

This regression has been introduced whith the original cs tlb w/a in

commit b45305fce5bb1abec263fcff9d81ebecd6306ede
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Mon Dec 17 16:21:27 2012 +0100

    drm/i915: Implement workaround for broken CS tlb on i830/845

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64610
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 18ca76e3e5ef..664118d8c1d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -534,9 +534,6 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
 	struct pipe_control *pc = ring->private;
 	struct drm_i915_gem_object *obj;
 
-	if (!ring->private)
-		return;
-
 	obj = pc->obj;
 
 	kunmap(sg_page(obj->pages->sgl));
@@ -544,7 +541,6 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
 	drm_gem_object_unreference(&obj->base);
 
 	kfree(pc);
-	ring->private = NULL;
 }
 
 static int init_render_ring(struct intel_ring_buffer *ring)
@@ -617,7 +613,10 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 	if (HAS_BROKEN_CS_TLB(dev))
 		drm_gem_object_unreference(to_gem_object(ring->private));
 
-	cleanup_pipe_control(ring);
+	if (INTEL_INFO(dev)->gen >= 5)
+		cleanup_pipe_control(ring);
+
+	ring->private = NULL;
 }
 
 static void

From d4eead50eb206b875f54f66cc0f6ec7d54122c28 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 9 Jul 2013 17:05:26 +0300
Subject: [PATCH 5/9] drm/i915: fix lane bandwidth capping for DP 1.2 sinks

DP 1.2 compatible displays may report a 5.4Gbps maximum bandwidth which
the driver will treat as an invalid value and use 1.62Gbps instead. Fix
this by capping to 2.7Gbps for sinks reporting a 5.4Gbps max bw.

Also add a warning for reserved values.

v2:
- allow only bw values explicitly listed in the DP standard (Daniel,
  Chris)

Signed-off-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_dp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b73971234013..26e162bb3a51 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -75,7 +75,12 @@ intel_dp_max_link_bw(struct intel_dp *intel_dp)
 	case DP_LINK_BW_1_62:
 	case DP_LINK_BW_2_7:
 		break;
+	case DP_LINK_BW_5_4: /* 1.2 capable displays may advertise higher bw */
+		max_link_bw = DP_LINK_BW_2_7;
+		break;
 	default:
+		WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
+		     max_link_bw);
 		max_link_bw = DP_LINK_BW_1_62;
 		break;
 	}

From c11e5f35ab490bd30591563816fbc83526521777 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 Jul 2013 16:00:31 -0700
Subject: [PATCH 6/9] Partially revert "drm/i915: unconditionally use mt
 forcewake on hsw/ivb"

This patch partially reverts commit 36ec8f877481449bdfa072e6adf2060869e2b970 for
IvyBridge CPUs.

The original commit results in repeated 'Timed out waiting for forcewake old
ack to clear' messages on a Supermicro C7H61 board (BIOS version 2.00 and 2.00b)
with i7-3770K CPU. It ultimately results in a hangup if the system is highly
loaded. Reverting the commit for IvyBridge CPUs fixes the issue.

Issue a warning if the CPU is IvyBridge and mt forcewake is disabled, since
this condition can result in secondary issues.

v2: Only revert patch for Ivybridge CPUs
    Issue info message if mt forcewake is disabled on Ivybridge

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60541
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66139
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_pm.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ccbdd83f5220..d10e6735771f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5500,9 +5500,38 @@ void intel_gt_init(struct drm_device *dev)
 	if (IS_VALLEYVIEW(dev)) {
 		dev_priv->gt.force_wake_get = vlv_force_wake_get;
 		dev_priv->gt.force_wake_put = vlv_force_wake_put;
-	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
+	} else if (IS_HASWELL(dev)) {
 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
+	} else if (IS_IVYBRIDGE(dev)) {
+		u32 ecobus;
+
+		/* IVB configs may use multi-threaded forcewake */
+
+		/* A small trick here - if the bios hasn't configured
+		 * MT forcewake, and if the device is in RC6, then
+		 * force_wake_mt_get will not wake the device and the
+		 * ECOBUS read will return zero. Which will be
+		 * (correctly) interpreted by the test below as MT
+		 * forcewake being disabled.
+		 */
+		mutex_lock(&dev->struct_mutex);
+		__gen6_gt_force_wake_mt_get(dev_priv);
+		ecobus = I915_READ_NOTRACE(ECOBUS);
+		__gen6_gt_force_wake_mt_put(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+
+		if (ecobus & FORCEWAKE_MT_ENABLE) {
+			dev_priv->gt.force_wake_get =
+						__gen6_gt_force_wake_mt_get;
+			dev_priv->gt.force_wake_put =
+						__gen6_gt_force_wake_mt_put;
+		} else {
+			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
+			DRM_INFO("when using vblank-synced partial screen updates.\n");
+			dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
+			dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
+		}
 	} else if (IS_GEN6(dev)) {
 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;

From 02978ff57a5bdfbf703d2bc5a4d933a53ede3144 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 9 Jul 2013 09:22:39 +0100
Subject: [PATCH 7/9] drm/i915: Fix write-read race with multiple rings

Daniel noticed a problem where is we wrote to an object with ring A in
the middle of a very long running batch, then executed a quick batch on
ring B before a batch that reads from the same object, its obj->ring would
now point to ring B, but its last_write_seqno would be still relative to
ring A. This would allow for the user to read from the object before the
GPU had completed the write, as set_domain would only check that ring B
had passed the last_write_seqno.

To fix this simply (and inelegantly), we bump the last_write_seqno when
switching rings so that the last_write_seqno is always relative to the
current obj->ring.

This fixes igt/tests/gem_write_read_ring_switch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: stable@vger.kernel.org
[danvet: Add note about the newly created igt which exercises this
bug.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f368d79f7d2..8fd8e82ebda4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1880,6 +1880,10 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	u32 seqno = intel_ring_get_seqno(ring);
 
 	BUG_ON(ring == NULL);
+	if (obj->ring != ring && obj->last_write_seqno) {
+		/* Keep the seqno relative to the current ring */
+		obj->last_write_seqno = seqno;
+	}
 	obj->ring = ring;
 
 	/* Add a reference if we're newly entering the active list. */

From d18b9619034230b6f945e215276425636ca401fe Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 10 Jul 2013 13:36:23 +0100
Subject: [PATCH 8/9] drm/i915: Fix incoherence with fence updates on
 Sandybridge+

This hopefully fixes the root cause behind the workaround added in

commit 25ff1195f8a0b3724541ae7bbe331b4296de9c06
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 4 21:31:03 2013 +0100

    drm/i915: Workaround incoherence between fences and LLC across multiple CPUs

Thanks to further investigation by Jon Bloomfield, he realised that
the 64-bit register might be broken up by the hardware into two 32-bit
writes (a problem we have encountered elsewhere). This non-atomicity
would then cause an issue where a second thread would see an
intermediate register state (new high dword, old low dword), and this
register would randomly be used in preference to its own thread register.
This would cause the second thread to read from and write into a fairly
random tiled location.  Breaking the operation into 3 explicit 32-bit
updates (first disable the fence, poke the upper bits, then poke the lower
bits and enable) ensures that, given proper serialisation between the
32-bit register write and the memory transfer, that the fence value is
always consistent.

Armed with this knowledge, we can explain how the previous workaround
work. The key to the corruption is that a second thread sees an
erroneous fence register that conflicts and overrides its own. By
serialising the fence update across all CPUs, we have a small window
where no GTT access is occurring and so hide the potential corruption.
This also leads to the conclusion that the earlier workaround was
incomplete.

v2: Be overly paranoid about the order in which fence updates become
visible to the GPU to make really sure that we turn the fence off before
doing the update, and then only switch the fence on afterwards.

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Carsten Emde <C.Emde@osadl.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8fd8e82ebda4..a34e8e2ba98a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2668,7 +2668,6 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int fence_reg;
 	int fence_pitch_shift;
-	uint64_t val;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		fence_reg = FENCE_REG_SANDYBRIDGE_0;
@@ -2678,8 +2677,23 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
 		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
 	}
 
+	fence_reg += reg * 8;
+
+	/* To w/a incoherency with non-atomic 64-bit register updates,
+	 * we split the 64-bit update into two 32-bit writes. In order
+	 * for a partial fence not to be evaluated between writes, we
+	 * precede the update with write to turn off the fence register,
+	 * and only enable the fence as the last step.
+	 *
+	 * For extra levels of paranoia, we make sure each step lands
+	 * before applying the next step.
+	 */
+	I915_WRITE(fence_reg, 0);
+	POSTING_READ(fence_reg);
+
 	if (obj) {
 		u32 size = obj->gtt_space->size;
+		uint64_t val;
 
 		val = (uint64_t)((obj->gtt_offset + size - 4096) &
 				 0xfffff000) << 32;
@@ -2688,12 +2702,16 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
 		val |= I965_FENCE_REG_VALID;
-	} else
-		val = 0;
 
-	fence_reg += reg * 8;
-	I915_WRITE64(fence_reg, val);
-	POSTING_READ(fence_reg);
+		I915_WRITE(fence_reg + 4, val >> 32);
+		POSTING_READ(fence_reg + 4);
+
+		I915_WRITE(fence_reg + 0, val);
+		POSTING_READ(fence_reg);
+	} else {
+		I915_WRITE(fence_reg + 4, 0);
+		POSTING_READ(fence_reg + 4);
+	}
 }
 
 static void i915_write_fence_reg(struct drm_device *dev, int reg,

From 46a0b638f35b45fc13d3dc0deb6a7e17988170b2 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 10 Jul 2013 13:36:24 +0100
Subject: [PATCH 9/9] Revert "drm/i915: Workaround incoherence between fences
 and LLC across multiple CPUs"

This reverts commit 25ff119 and the follow on for Valleyview commit 2dc8aae.

commit 25ff1195f8a0b3724541ae7bbe331b4296de9c06
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Apr 4 21:31:03 2013 +0100

    drm/i915: Workaround incoherence between fences and LLC across multiple CPUs

commit 2dc8aae06d53458dd3624dc0accd4f81100ee631
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed May 22 17:08:06 2013 +0100

    drm/i915: Workaround incoherence with fence updates on Valleyview

Jon Bloomfield came up with a plausible explanation and cheap fix
(drm/i915: Fix incoherence with fence updates on Sandybridge+) for the
race condition, so lets run with it.

This is a candidate for stable as the old workaround incurs a
significant cost (calling wbinvd on all CPUs before performing the
register write) for some workloads as noted by Carsten Emde.

Link: http://lists.freedesktop.org/archives/intel-gfx/2013-June/028819.html
References: https://www.osadl.org/?id=1543#c7602
References: https://bugs.freedesktop.org/show_bug.cgi?id=63825
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Carsten Emde <C.Emde@osadl.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 45 +++------------------------------
 1 file changed, 3 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a34e8e2ba98a..06d66e09da17 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2829,56 +2829,17 @@ static inline int fence_number(struct drm_i915_private *dev_priv,
 	return fence - dev_priv->fence_regs;
 }
 
-struct write_fence {
-	struct drm_device *dev;
-	struct drm_i915_gem_object *obj;
-	int fence;
-};
-
-static void i915_gem_write_fence__ipi(void *data)
-{
-	struct write_fence *args = data;
-
-	/* Required for SNB+ with LLC */
-	wbinvd();
-
-	/* Required for VLV */
-	i915_gem_write_fence(args->dev, args->fence, args->obj);
-}
-
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 					 struct drm_i915_fence_reg *fence,
 					 bool enable)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct write_fence args = {
-		.dev = obj->base.dev,
-		.fence = fence_number(dev_priv, fence),
-		.obj = enable ? obj : NULL,
-	};
+	int reg = fence_number(dev_priv, fence);
 
-	/* In order to fully serialize access to the fenced region and
-	 * the update to the fence register we need to take extreme
-	 * measures on SNB+. In theory, the write to the fence register
-	 * flushes all memory transactions before, and coupled with the
-	 * mb() placed around the register write we serialise all memory
-	 * operations with respect to the changes in the tiler. Yet, on
-	 * SNB+ we need to take a step further and emit an explicit wbinvd()
-	 * on each processor in order to manually flush all memory
-	 * transactions before updating the fence register.
-	 *
-	 * However, Valleyview complicates matter. There the wbinvd is
-	 * insufficient and unlike SNB/IVB requires the serialising
-	 * register write. (Note that that register write by itself is
-	 * conversely not sufficient for SNB+.) To compromise, we do both.
-	 */
-	if (INTEL_INFO(args.dev)->gen >= 6)
-		on_each_cpu(i915_gem_write_fence__ipi, &args, 1);
-	else
-		i915_gem_write_fence(args.dev, args.fence, args.obj);
+	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
 
 	if (enable) {
-		obj->fence_reg = args.fence;
+		obj->fence_reg = reg;
 		fence->obj = obj;
 		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
 	} else {