From 460822b0b1a77db859b0320469799fa4dbe4d367 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 3 Feb 2015 15:48:17 +0100
Subject: [PATCH 1/9] drm/i915: Prevent use-after-free in
 invalidate_range_start callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's possible for invalidate_range_start mmu notifier callback to race
against userptr object release. If the gem object was released prior to
obtaining the spinlock in invalidate_range_start we're hitting null
pointer dereference.

Testcase: igt/gem_userptr_blits/stress-mm-invalidate-close
Testcase: igt/gem_userptr_blits/stress-mm-invalidate-close-overlap
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
[Jani: added code comment suggested by Chris]
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d182058383a9..1719078c763a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -113,7 +113,10 @@ static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
 			continue;
 
 		obj = mo->obj;
-		drm_gem_object_reference(&obj->base);
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
 		spin_unlock(&mn->lock);
 
 		cancel_userptr(obj);
@@ -149,7 +152,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 			it = interval_tree_iter_first(&mn->objects, start, end);
 		if (it != NULL) {
 			obj = container_of(it, struct i915_mmu_object, it)->obj;
-			drm_gem_object_reference(&obj->base);
+
+			/* The mmu_object is released late when destroying the
+			 * GEM object so it is entirely possible to gain a
+			 * reference on an object in the process of being freed
+			 * since our serialisation is via the spinlock and not
+			 * the struct_mutex - and consequently use it after it
+			 * is freed and then double free it.
+			 */
+			if (!kref_get_unless_zero(&obj->base.refcount)) {
+				spin_unlock(&mn->lock);
+				serial = 0;
+				continue;
+			}
+
 			serial = mn->serial;
 		}
 		spin_unlock(&mn->lock);

From d180d2bbb66579e3bf449642b8ec2a76f4014fcd Mon Sep 17 00:00:00 2001
From: Shobhit Kumar <shobhit.kumar@intel.com>
Date: Thu, 5 Feb 2015 17:10:56 +0530
Subject: [PATCH 2/9] drm/i915: Correct the IOSF Dev_FN field for IOSF
 transfers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As per the specififcation, the SB_DevFn is the PCI_DEVFN of the target
device and not the source. So PCI_DEVFN(2,0) is not correct. Further the
port ID should be enough to identify devices unless they are MFD. The
SB_DevFn was intended to remove ambiguity in case of these MFD devices.

For non MFD devices the recommendation for the target device IP was to
ignore these fields, but not all of them followed the recommendation.
Some like CCK ignore these fields and hence PCI_DEVFN(2, 0) works and so
does PCI_DEVFN(0, 0) as it works for DPIO. The issue came to light because
of GPIONC which was not getting programmed correctly with PCI_DEVFN(2, 0).
It turned out that this did not follow the recommendation and expected 0
in this field.

In general the recommendation is to use SB_DevFn as PCI_DEVFN(0, 0) for
all devices except target PCI devices.

Signed-off-by: Shobhit Kumar <shobhit.kumar@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_sideband.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 3c42eeffa3cb..693ce8281970 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -82,7 +82,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	mutex_lock(&dev_priv->dpio_lock);
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_PUNIT,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 
@@ -94,7 +94,7 @@ void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	mutex_lock(&dev_priv->dpio_lock);
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_PUNIT,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRWRDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 }
@@ -103,7 +103,7 @@ u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
 
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_BUNIT,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
 			SB_CRRDDA_NP, reg, &val);
 
 	return val;
@@ -111,7 +111,7 @@ u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
 
 void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_BUNIT,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
 			SB_CRWRDA_NP, reg, &val);
 }
 
@@ -122,7 +122,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	mutex_lock(&dev_priv->dpio_lock);
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_NC,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
 	mutex_unlock(&dev_priv->dpio_lock);
 
@@ -132,56 +132,56 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPIO_NC,
 			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPIO_NC,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPIO_NC,
 			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
 			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCK,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
 			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
 			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_CCU,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
 			SB_CRWRDA_NP, reg, &val);
 }
 
 u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPS_CORE,
 			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
 void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(2, 0), IOSF_PORT_GPS_CORE,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPS_CORE,
 			SB_CRWRDA_NP, reg, &val);
 }
 

From ebbc7546d2099c94ff2ea940ae5ce740e512a66d Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Thu, 5 Feb 2015 18:41:48 +0200
Subject: [PATCH 3/9] drm/i915: Take runtime pm reference on hangcheck_info

We read the coherent current seqno and actual head from ring.
For hardware access we need to take runtime_pm reference.

Get hardware specific values with runtime reference held
and print them first to emphasize hw state vs bookkeepping.

v2: Reorder output according to hw access (Chris)
    remove superfluous locking (Daniel)

Testcase: igt/pm_rpm/debugfs-read
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88910
Tested-by: Ding Heng <hengx.ding@intel.com> (v1)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 211d4949a675..96e811fe24ca 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1223,8 +1223,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 static int i915_hangcheck_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
-	struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
+	u64 acthd[I915_NUM_RINGS];
+	u32 seqno[I915_NUM_RINGS];
 	int i;
 
 	if (!i915.enable_hangcheck) {
@@ -1232,6 +1235,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
+	intel_runtime_pm_get(dev_priv);
+
+	for_each_ring(ring, dev_priv, i) {
+		seqno[i] = ring->get_seqno(ring, false);
+		acthd[i] = intel_ring_get_active_head(ring);
+	}
+
+	intel_runtime_pm_put(dev_priv);
+
 	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
 		seq_printf(m, "Hangcheck active, fires in %dms\n",
 			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
@@ -1242,14 +1254,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	for_each_ring(ring, dev_priv, i) {
 		seq_printf(m, "%s:\n", ring->name);
 		seq_printf(m, "\tseqno = %x [current %x]\n",
-			   ring->hangcheck.seqno, ring->get_seqno(ring, false));
-		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
-		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
+			   ring->hangcheck.seqno, seqno[i]);
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)ring->hangcheck.acthd,
-			   (long long)intel_ring_get_active_head(ring));
+			   (long long)acthd[i]);
 		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
 			   (long long)ring->hangcheck.max_acthd);
+		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
+		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
 	}
 
 	return 0;

From 3225b2f95dbb9981be9e2002e49cd8abf0d8d01a Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Thu, 5 Feb 2015 17:45:42 +0200
Subject: [PATCH 4/9] drm/i915: Squelch overzealous uncore reset WARN_ON

We added this WARN_ON to guard against using uninitialized
forcewake domains. But forgot blissfully that not all
gens have forcewake domains in the first place.

v2: Move WARN_ON to fw_domains_init (Chris)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88911
Tested-by: Ding Heng <hengx.ding@intel.com> (v1)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
[Jani: add comment above WARN_ON as suggested by Chris]
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 76b60a3538b2..c47a3baa53d5 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -166,7 +166,8 @@ fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_do
 	struct intel_uncore_forcewake_domain *d;
 	enum forcewake_domain_id id;
 
-	WARN_ON(dev_priv->uncore.fw_domains == 0);
+	if (dev_priv->uncore.fw_domains == 0)
+		return;
 
 	for_each_fw_domain_mask(d, fw_domains, dev_priv, id)
 		fw_domain_reset(d);
@@ -997,6 +998,9 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (INTEL_INFO(dev_priv->dev)->gen <= 5)
+		return;
+
 	if (IS_GEN9(dev)) {
 		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
@@ -1069,6 +1073,9 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE, FORCEWAKE_ACK);
 	}
+
+	/* All future platforms are expected to require complex power gating */
+	WARN_ON(dev_priv->uncore.fw_domains == 0);
 }
 
 void intel_uncore_init(struct drm_device *dev)

From 0ca09685546fed5fc8f0535204f0626f352140f4 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 24 Nov 2014 16:54:11 +0100
Subject: [PATCH 5/9] drm/i915: Drop vblank wait from intel_dp_link_down

Nothing in Bspec seems to indicate that we actually needs this, and it
looks like can't work since by this point the pipe is off and so
vblanks won't really happen any more.

Note that Bspec mentions that it takes a vblank for this bit to
change, but _only_ when enabling.

Dropping this code quenches an annoying backtrace introduced by the
more anal checking since

commit 51e31d49c89055299e34b8f44d13f70e19aaaad1
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Mon Sep 15 12:36:02 2014 +0200

    drm/i915: Use generic vblank wait

Note: This fixes the fallout from the above commit, but does not address
the shortcomings of the IBX transcoder select workaround implementation
discussed during review [1].

[1] http://mid.gmane.org/87y4o7usxf.fsf@intel.com

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86095
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: stable@vger.kernel.org # 3.19
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index eea9e366a109..d4c82d776fba 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3521,8 +3521,6 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 	enum port port = intel_dig_port->port;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc =
-		to_intel_crtc(intel_dig_port->base.base.crtc);
 	uint32_t DP = intel_dp->DP;
 
 	if (WARN_ON(HAS_DDI(dev)))
@@ -3547,8 +3545,6 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 
 	if (HAS_PCH_IBX(dev) &&
 	    I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
-		struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
-
 		/* Hardware workaround: leaving our transcoder select
 		 * set to transcoder B while it's off will prevent the
 		 * corresponding HDMI output on transcoder A.
@@ -3559,18 +3555,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 		 */
 		DP &= ~DP_PIPEB_SELECT;
 		I915_WRITE(intel_dp->output_reg, DP);
-
-		/* Changes to enable or select take place the vblank
-		 * after being written.
-		 */
-		if (WARN_ON(crtc == NULL)) {
-			/* We should never try to disable a port without a crtc
-			 * attached. For paranoia keep the code around for a
-			 * bit. */
-			POSTING_READ(intel_dp->output_reg);
-			msleep(50);
-		} else
-			intel_wait_for_vblank(dev, intel_crtc->pipe);
+		POSTING_READ(intel_dp->output_reg);
 	}
 
 	DP &= ~DP_AUDIO_OUTPUT_ENABLE;

From f0a1fb10e5f79f5aaf8d7e94b9fa6bf2fa9aeebf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Jan 2015 13:42:00 +0000
Subject: [PATCH 6/9] drm/i915: Insert a command barrier on BLT/BSD cache
 flushes

This looked like an odd regression from

commit ec5cc0f9b019af95e4571a9fa162d94294c8d90b
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jun 12 10:28:55 2014 +0100

    drm/i915: Restrict GPU boost to the RCS engine

but in reality it undercovered a much older coherency bug. The issue that
boosting the GPU frequency on the BCS ring was masking was that we could
wake the CPU up after completion of a BCS batch and inspect memory prior
to the write cache being fully evicted. In order to serialise the
breadcrumb interrupt (and so ensure that the CPU's view of memory is
coherent) we need to perform a post-sync operation in the MI_FLUSH_DW.

v2: Fix all the MI_FLUSH_DW (bsd plus the duplication in execlists).

Also fix the invalidate_domains mask in gen8_emit_flush() for ring !=
VCS.

Testcase: gpuX-rcs-gpu-read-after-write
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 20 +++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++----
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a94346fee160..0f358c5999ec 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1211,15 +1211,17 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
 
 	cmd = MI_FLUSH_DW + 1;
 
-	if (ring == &dev_priv->ring[VCS]) {
-		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-				MI_FLUSH_DW_STORE_INDEX |
-				MI_FLUSH_DW_OP_STOREDW;
-	} else {
-		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
-			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-				MI_FLUSH_DW_OP_STOREDW;
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (ring == &dev_priv->ring[VCS])
+			cmd |= MI_INVALIDATE_BSD;
 	}
 
 	intel_logical_ring_emit(ringbuf, cmd);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0bd3976d88e1..e5b3c6dbd467 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2240,6 +2240,14 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 	cmd = MI_FLUSH_DW;
 	if (INTEL_INFO(ring->dev)->gen >= 8)
 		cmd += 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
 	/*
 	 * Bspec vol 1c.5 - video engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -2247,8 +2255,8 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
 	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
 	if (INTEL_INFO(ring->dev)->gen >= 8) {
@@ -2344,6 +2352,14 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
 	cmd = MI_FLUSH_DW;
 	if (INTEL_INFO(ring->dev)->gen >= 8)
 		cmd += 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
 	/*
 	 * Bspec vol 1c.3 - blitter engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -2351,8 +2367,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
 	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-			MI_FLUSH_DW_OP_STOREDW;
+		cmd |= MI_INVALIDATE_TLB;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
 	if (INTEL_INFO(ring->dev)->gen >= 8) {

From 4ba7d93afee0a2ddef7598f460927d39f33fe98b Mon Sep 17 00:00:00 2001
From: Shobhit Kumar <shobhit.kumar@intel.com>
Date: Thu, 5 Feb 2015 17:08:45 +0530
Subject: [PATCH 7/9] drm/i915: Correct the base value while updating
 LP_OUTPUT_HOLD in MIPI_PORT_CTRL

LP_OUTPUT_HOLD is only in MIPI_PORT_CTRL(PORT_A) even for PORT_C in case
of dual link. In the dual link implementation, the bit is correctly set
or unset for hardcoded PORT_A, but for bit update the register base value
is read by using MIPI_PORT_CTRL(port) in a loop. The second iteration will
read base value from PORT_C and program for PORT_A. Mostly in case of dual
link all other bit values should be same, but logically we should read from
PORT_A. So hardcode to read initial value from PORT_A as well.

Signed-off-by: Shobhit Kumar <shobhit.kumar@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index ef3df5e3d819..10ab68457ca8 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -360,12 +360,11 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder)
 		I915_WRITE(MIPI_DEVICE_READY(port), ULPS_STATE_ENTER);
 		usleep_range(2500, 3000);
 
-		val = I915_READ(MIPI_PORT_CTRL(port));
-
 		/* Enable MIPI PHY transparent latch
 		 * Common bit for both MIPI Port A & MIPI Port C
 		 * No similar bit in MIPI Port C reg
 		 */
+		val = I915_READ(MIPI_PORT_CTRL(PORT_A));
 		I915_WRITE(MIPI_PORT_CTRL(PORT_A), val | LP_OUTPUT_HOLD);
 		usleep_range(1000, 1500);
 
@@ -543,10 +542,10 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder)
 							== 0x00000), 30))
 			DRM_ERROR("DSI LP not going Low\n");
 
-		val = I915_READ(MIPI_PORT_CTRL(port));
 		/* Disable MIPI PHY transparent latch
 		 * Common bit for both MIPI Port A & MIPI Port C
 		 */
+		val = I915_READ(MIPI_PORT_CTRL(PORT_A));
 		I915_WRITE(MIPI_PORT_CTRL(PORT_A), val & ~LP_OUTPUT_HOLD);
 		usleep_range(1000, 1500);
 

From a8b3d52f8bc24b1c1bf7711bcf3e21fe0aaa752c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 10 Feb 2015 14:11:46 +0200
Subject: [PATCH 8/9] drm/i915: Really ignore long HPD pulses on eDP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return IRQ_HANDLED from intel_dp_hpd_pulse() to properly
ignore the long HPD pulse on eDP to avoid the never ending
VDD off->HPD->VDD on->VDD off->HPD... cycle.

This fixes a regression intoduced by
 commit b2c5c181ed18490648a02f8c7d562a3b9e8b96de
 Author: Daniel Vetter <daniel.vetter@ffwll.ch>
 Date:   Fri Jan 23 06:00:31 2015 +0100

    drm/i915: Use symbolic irqreturn for ->hpd_pulse

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d4c82d776fba..a74aaf9242b9 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4431,7 +4431,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		 */
 		DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n",
 			      port_name(intel_dig_port->port));
-		return false;
+		return IRQ_HANDLED;
 	}
 
 	DRM_DEBUG_KMS("got hpd irq on port %c - %s\n",

From 46efa4abe5712276494adbce102f46e3214632fd Mon Sep 17 00:00:00 2001
From: Tom O'Rourke <Tom.O'Rourke@intel.com>
Date: Tue, 10 Feb 2015 23:06:46 -0800
Subject: [PATCH 9/9] drm/i915: Clamp efficient frequency to valid range

The efficient frequency (RPe) should stay in the range
RPn <= RPe <= RP0.  The pcode clamps the returned value
internally on Broadwell but not on Haswell.

Fix for missing range check in
commit 93ee29203f506582cca2bcec5f05041526d9ab0a
Author: Tom O'Rourke <Tom.O'Rourke@intel.com>
Date:   Wed Nov 19 14:21:52 2014 -0800

    drm/i915: Use efficient frequency for HSW/BDW

Reference: http://lists.freedesktop.org/archives/intel-gfx/2015-February/059802.html
Reported-by: Michael Auchter <a@phire.org>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org # v3.19
Signed-off-by: Tom O'Rourke <Tom.O'Rourke@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6ece663f3394..24d77ddcc5f4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4005,7 +4005,10 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
 					&ddcc_status);
 		if (0 == ret)
 			dev_priv->rps.efficient_freq =
-				(ddcc_status >> 8) & 0xff;
+				clamp_t(u8,
+					((ddcc_status >> 8) & 0xff),
+					dev_priv->rps.min_freq,
+					dev_priv->rps.max_freq);
 	}
 
 	/* Preserve min/max settings in case of re-init */