summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c89
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h5
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c120
-rw-r--r--drivers/gpu/drm/i915/i915_params.c5
-rw-r--r--drivers/gpu/drm/i915/i915_params.h1
-rw-r--r--drivers/gpu/drm/i915/intel_atomic.c11
-rw-r--r--drivers/gpu/drm/i915/intel_atomic_plane.c1
-rw-r--r--drivers/gpu/drm/i915/intel_display.c1590
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h43
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c39
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c4
11 files changed, 1283 insertions, 625 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b29ba16c90b3..ac7e5692496d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -621,52 +621,6 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static void i915_dump_pageflip(struct seq_file *m,
-			       struct drm_i915_private *dev_priv,
-			       struct intel_crtc *crtc,
-			       struct intel_flip_work *work)
-{
-	const char pipe = pipe_name(crtc->pipe);
-	u32 pending;
-	int i;
-
-	pending = atomic_read(&work->pending);
-	if (pending) {
-		seq_printf(m, "Flip ioctl preparing on pipe %c (plane %c)\n",
-			   pipe, plane_name(crtc->plane));
-	} else {
-		seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
-			   pipe, plane_name(crtc->plane));
-	}
-
-	for (i = 0; i < work->num_planes; i++) {
-		struct intel_plane_state *old_plane_state = work->old_plane_state[i];
-		struct drm_plane *plane = old_plane_state->base.plane;
-		struct drm_i915_gem_request *req = old_plane_state->wait_req;
-		struct intel_engine_cs *engine;
-
-		seq_printf(m, "[PLANE:%i] part of flip.\n", plane->base.id);
-
-		if (!req) {
-			seq_printf(m, "Plane not associated with any engine\n");
-			continue;
-		}
-
-		engine = i915_gem_request_get_engine(req);
-
-		seq_printf(m, "Plane blocked on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
-			   engine->name,
-			   i915_gem_request_get_seqno(req),
-			   dev_priv->next_seqno,
-			   engine->get_seqno(engine),
-			   i915_gem_request_completed(req, true));
-	}
-
-	seq_printf(m, "Flip queued on frame %d, now %d\n",
-		   pending ? work->flip_queued_vblank : -1,
-		   intel_crtc_get_vblank_counter(crtc));
-}
-
 static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -685,13 +639,48 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 		struct intel_flip_work *work;
 
 		spin_lock_irq(&dev->event_lock);
-		if (list_empty(&crtc->flip_work)) {
+		work = crtc->flip_work;
+		if (work == NULL) {
 			seq_printf(m, "No flip due on pipe %c (plane %c)\n",
 				   pipe, plane);
 		} else {
-			list_for_each_entry(work, &crtc->flip_work, head) {
-				i915_dump_pageflip(m, dev_priv, crtc, work);
-				seq_puts(m, "\n");
+			u32 pending;
+			u32 addr;
+
+			pending = atomic_read(&work->pending);
+			if (pending) {
+				seq_printf(m, "Flip ioctl preparing on pipe %c (plane %c)\n",
+					   pipe, plane);
+			} else {
+				seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
+					   pipe, plane);
+			}
+			if (work->flip_queued_req) {
+				struct intel_engine_cs *engine = i915_gem_request_get_engine(work->flip_queued_req);
+
+				seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
+					   engine->name,
+					   i915_gem_request_get_seqno(work->flip_queued_req),
+					   dev_priv->next_seqno,
+					   engine->get_seqno(engine),
+					   i915_gem_request_completed(work->flip_queued_req, true));
+			} else
+				seq_printf(m, "Flip not associated with any ring\n");
+			seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
+				   work->flip_queued_vblank,
+				   work->flip_ready_vblank,
+				   intel_crtc_get_vblank_counter(crtc));
+			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
+
+			if (INTEL_INFO(dev)->gen >= 4)
+				addr = I915_HI_DISPBASE(I915_READ(DSPSURF(crtc->plane)));
+			else
+				addr = I915_READ(DSPADDR(crtc->plane));
+			seq_printf(m, "Current scanout address 0x%08x\n", addr);
+
+			if (work->pending_flip_obj) {
+				seq_printf(m, "New framebuffer address 0x%08lx\n", (long)work->gtt_offset);
+				seq_printf(m, "MMIO update completed? %d\n",  addr == work->gtt_offset);
 			}
 		}
 		spin_unlock_irq(&dev->event_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85a7c44ed55c..e4c8e341655c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -618,6 +618,11 @@ struct drm_i915_display_funcs {
 	void (*audio_codec_disable)(struct intel_encoder *encoder);
 	void (*fdi_link_train)(struct drm_crtc *crtc);
 	void (*init_clock_gating)(struct drm_device *dev);
+	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
+			  struct drm_framebuffer *fb,
+			  struct drm_i915_gem_object *obj,
+			  struct drm_i915_gem_request *req,
+			  uint32_t flags);
 	void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
 	/* clock updates for mode set */
 	/* cursor updates */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fc2b2a7e2c55..caaf1e2a7bc1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -136,12 +136,6 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = {
 	POSTING_READ(type##IIR); \
 } while (0)
 
-static void
-intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, unsigned pipe)
-{
-	DRM_DEBUG_KMS("Finished page flip\n");
-}
-
 /*
  * We should clear IMR at preinstall/uninstall, and just check at postinstall.
  */
@@ -1637,11 +1631,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 	}
 }
 
-static void intel_pipe_handle_vblank(struct drm_i915_private *dev_priv,
+static bool intel_pipe_handle_vblank(struct drm_i915_private *dev_priv,
 				     enum pipe pipe)
 {
-	if (drm_handle_vblank(dev_priv->dev, pipe))
+	bool ret;
+
+	ret = drm_handle_vblank(dev_priv->dev, pipe);
+	if (ret)
 		intel_finish_page_flip_mmio(dev_priv, pipe);
+
+	return ret;
 }
 
 static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv,
@@ -1708,8 +1707,9 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv,
 	enum pipe pipe;
 
 	for_each_pipe(dev_priv, pipe) {
-		if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS)
-			intel_pipe_handle_vblank(dev_priv, pipe);
+		if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS &&
+		    intel_pipe_handle_vblank(dev_priv, pipe))
+			intel_check_page_flip(dev_priv, pipe);
 
 		if (pipe_stats[pipe] & PLANE_FLIP_DONE_INT_STATUS_VLV)
 			intel_finish_page_flip_cs(dev_priv, pipe);
@@ -2155,8 +2155,9 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 		DRM_ERROR("Poison interrupt\n");
 
 	for_each_pipe(dev_priv, pipe) {
-		if (de_iir & DE_PIPE_VBLANK(pipe))
-			intel_pipe_handle_vblank(dev_priv, pipe);
+		if (de_iir & DE_PIPE_VBLANK(pipe) &&
+		    intel_pipe_handle_vblank(dev_priv, pipe))
+			intel_check_page_flip(dev_priv, pipe);
 
 		if (de_iir & DE_PIPE_FIFO_UNDERRUN(pipe))
 			intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe);
@@ -2205,8 +2206,9 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
 		intel_opregion_asle_intr(dev_priv);
 
 	for_each_pipe(dev_priv, pipe) {
-		if (de_iir & (DE_PIPE_VBLANK_IVB(pipe)))
-			intel_pipe_handle_vblank(dev_priv, pipe);
+		if (de_iir & (DE_PIPE_VBLANK_IVB(pipe)) &&
+		    intel_pipe_handle_vblank(dev_priv, pipe))
+			intel_check_page_flip(dev_priv, pipe);
 
 		/* plane/pipes map 1:1 on ilk+ */
 		if (de_iir & DE_PLANE_FLIP_DONE_IVB(pipe))
@@ -2405,8 +2407,9 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 		ret = IRQ_HANDLED;
 		I915_WRITE(GEN8_DE_PIPE_IIR(pipe), iir);
 
-		if (iir & GEN8_PIPE_VBLANK)
-			intel_pipe_handle_vblank(dev_priv, pipe);
+		if (iir & GEN8_PIPE_VBLANK &&
+		    intel_pipe_handle_vblank(dev_priv, pipe))
+			intel_check_page_flip(dev_priv, pipe);
 
 		flip_done = iir;
 		if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -3972,6 +3975,37 @@ static int i8xx_irq_postinstall(struct drm_device *dev)
 	return 0;
 }
 
+/*
+ * Returns true when a page flip has completed.
+ */
+static bool i8xx_handle_vblank(struct drm_i915_private *dev_priv,
+			       int plane, int pipe, u32 iir)
+{
+	u16 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane);
+
+	if (!intel_pipe_handle_vblank(dev_priv, pipe))
+		return false;
+
+	if ((iir & flip_pending) == 0)
+		goto check_page_flip;
+
+	/* We detect FlipDone by looking for the change in PendingFlip from '1'
+	 * to '0' on the following vblank, i.e. IIR has the Pendingflip
+	 * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence
+	 * the flip is completed (no longer pending). Since this doesn't raise
+	 * an interrupt per se, we watch for the change at vblank.
+	 */
+	if (I915_READ16(ISR) & flip_pending)
+		goto check_page_flip;
+
+	intel_finish_page_flip_cs(dev_priv, pipe);
+	return true;
+
+check_page_flip:
+	intel_check_page_flip(dev_priv, pipe);
+	return false;
+}
+
 static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -4024,8 +4058,13 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 			notify_ring(&dev_priv->engine[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
-			if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS)
-				intel_pipe_handle_vblank(dev_priv, pipe);
+			int plane = pipe;
+			if (HAS_FBC(dev_priv))
+				plane = !plane;
+
+			if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS &&
+			    i8xx_handle_vblank(dev_priv, plane, pipe, iir))
+				flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(plane);
 
 			if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS)
 				i9xx_pipe_crc_irq_handler(dev_priv, pipe);
@@ -4125,6 +4164,37 @@ static int i915_irq_postinstall(struct drm_device *dev)
 	return 0;
 }
 
+/*
+ * Returns true when a page flip has completed.
+ */
+static bool i915_handle_vblank(struct drm_i915_private *dev_priv,
+			       int plane, int pipe, u32 iir)
+{
+	u32 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane);
+
+	if (!intel_pipe_handle_vblank(dev_priv, pipe))
+		return false;
+
+	if ((iir & flip_pending) == 0)
+		goto check_page_flip;
+
+	/* We detect FlipDone by looking for the change in PendingFlip from '1'
+	 * to '0' on the following vblank, i.e. IIR has the Pendingflip
+	 * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence
+	 * the flip is completed (no longer pending). Since this doesn't raise
+	 * an interrupt per se, we watch for the change at vblank.
+	 */
+	if (I915_READ(ISR) & flip_pending)
+		goto check_page_flip;
+
+	intel_finish_page_flip_cs(dev_priv, pipe);
+	return true;
+
+check_page_flip:
+	intel_check_page_flip(dev_priv, pipe);
+	return false;
+}
+
 static irqreturn_t i915_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -4185,8 +4255,13 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 			notify_ring(&dev_priv->engine[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
-			if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS)
-				intel_pipe_handle_vblank(dev_priv, pipe);
+			int plane = pipe;
+			if (HAS_FBC(dev_priv))
+				plane = !plane;
+
+			if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS &&
+			    i915_handle_vblank(dev_priv, plane, pipe, iir))
+				flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(plane);
 
 			if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS)
 				blc_event = true;
@@ -4414,8 +4489,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 			notify_ring(&dev_priv->engine[VCS]);
 
 		for_each_pipe(dev_priv, pipe) {
-			if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS)
-				intel_pipe_handle_vblank(dev_priv, pipe);
+			if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS &&
+			    i915_handle_vblank(dev_priv, pipe, pipe, iir))
+				flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(pipe);
 
 			if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS)
 				blc_event = true;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 9a5d58b251f5..5e18cf9f754d 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -49,6 +49,7 @@ struct i915_params i915 __read_mostly = {
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
+	.use_mmio_flip = 0,
 	.mmio_debug = 0,
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
@@ -174,6 +175,10 @@ module_param_named_unsafe(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled [default], 0=disabled)");
 
+module_param_named_unsafe(use_mmio_flip, i915.use_mmio_flip, int, 0600);
+MODULE_PARM_DESC(use_mmio_flip,
+		 "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)");
+
 module_param_named(mmio_debug, i915.mmio_debug, int, 0600);
 MODULE_PARM_DESC(mmio_debug,
 	"Enable the MMIO debug code for the first N failures (default: off). "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 658ce7379671..1323261a0cdd 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -48,6 +48,7 @@ struct i915_params {
 	int enable_guc_loading;
 	int enable_guc_submission;
 	int guc_log_level;
+	int use_mmio_flip;
 	int mmio_debug;
 	int edp_vswing;
 	unsigned int inject_load_failure;
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index b4927f6bbeac..50ff90aea721 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -311,17 +311,6 @@ intel_atomic_state_alloc(struct drm_device *dev)
 void intel_atomic_state_clear(struct drm_atomic_state *s)
 {
 	struct intel_atomic_state *state = to_intel_atomic_state(s);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(state->work); i++) {
-		struct intel_flip_work *work = state->work[i];
-
-		if (work)
-			intel_free_flip_work(work);
-
-		state->work[i] = NULL;
-	}
-
 	drm_atomic_state_default_clear(&state->base);
 	state->dpll_set = state->modeset = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 2ab45f16fa65..7de7721f65bc 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -102,7 +102,6 @@ intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
 	WARN_ON(state && to_intel_plane_state(state)->wait_req);
-	WARN_ON(state && state->fence);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fd171fd2b255..46429e73e058 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -48,6 +48,11 @@
 #include <linux/reservation.h>
 #include <linux/dma-buf.h>
 
+static bool is_mmio_work(struct intel_flip_work *work)
+{
+	return work->mmio_work.func;
+}
+
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
 	DRM_FORMAT_C8,
@@ -108,6 +113,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
 static void chv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
+static void intel_begin_crtc_commit(struct drm_crtc *, struct drm_crtc_state *);
+static void intel_finish_crtc_commit(struct drm_crtc *, struct drm_crtc_state *);
 static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc,
 	struct intel_crtc_state *crtc_state);
 static void skylake_pfit_enable(struct intel_crtc *crtc);
@@ -116,9 +123,6 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc);
 static void intel_modeset_setup_hw_state(struct drm_device *dev);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 static int ilk_max_pixel_rate(struct drm_atomic_state *state);
-static void intel_modeset_verify_crtc(struct drm_crtc *crtc,
-				      struct drm_crtc_state *old_state,
-				      struct drm_crtc_state *new_state);
 static int broxton_calc_cdclk(int max_pixclk);
 
 struct intel_limit {
@@ -2524,6 +2528,20 @@ out_unref_obj:
 	return false;
 }
 
+/* Update plane->state->fb to match plane->fb after driver-internal updates */
+static void
+update_state_fb(struct drm_plane *plane)
+{
+	if (plane->fb == plane->state->fb)
+		return;
+
+	if (plane->state->fb)
+		drm_framebuffer_unreference(plane->state->fb);
+	plane->state->fb = plane->fb;
+	if (plane->state->fb)
+		drm_framebuffer_reference(plane->state->fb);
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			     struct intel_initial_plane_config *plane_config)
@@ -3096,6 +3114,14 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	return -ENODEV;
 }
 
+static void intel_complete_page_flips(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(dev_priv->dev, crtc)
+		intel_finish_page_flip_cs(dev_priv, crtc->pipe);
+}
+
 static void intel_update_primary_planes(struct drm_device *dev)
 {
 	struct drm_crtc *crtc;
@@ -3136,6 +3162,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+	/*
+	 * Flips in the rings will be nuked by the reset,
+	 * so complete all pending flips so that user space
+	 * will get its events and not get stuck.
+	 */
+	intel_complete_page_flips(dev_priv);
+
 	/* no reset support for gen2 */
 	if (IS_GEN2(dev_priv))
 		return;
@@ -3178,7 +3211,20 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
-	return !list_empty_careful(&to_intel_crtc(crtc)->flip_work);
+	struct drm_device *dev = crtc->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	unsigned reset_counter;
+	bool pending;
+
+	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
+	if (intel_crtc->reset_counter != reset_counter)
+		return false;
+
+	spin_lock_irq(&dev->event_lock);
+	pending = to_intel_crtc(crtc)->flip_work != NULL;
+	spin_unlock_irq(&dev->event_lock);
+
+	return pending;
 }
 
 static void intel_update_pipe_config(struct intel_crtc *crtc,
@@ -3754,7 +3800,7 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev)
 		if (atomic_read(&crtc->unpin_work_count) == 0)
 			continue;
 
-		if (!list_empty_careful(&crtc->flip_work))
+		if (crtc->flip_work)
 			intel_wait_for_vblank(dev, crtc->pipe);
 
 		return true;
@@ -3763,30 +3809,23 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev)
 	return false;
 }
 
-static void page_flip_completed(struct intel_crtc *intel_crtc, struct intel_flip_work *work)
+static void page_flip_completed(struct intel_crtc *intel_crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-	struct drm_plane_state *new_plane_state;
-	struct drm_plane *primary = intel_crtc->base.primary;
+	struct intel_flip_work *work = intel_crtc->flip_work;
+
+	intel_crtc->flip_work = NULL;
 
 	if (work->event)
 		drm_crtc_send_vblank_event(&intel_crtc->base, work->event);
 
 	drm_crtc_vblank_put(&intel_crtc->base);
 
-	new_plane_state = &work->old_plane_state[0]->base;
-	if (work->num_planes >= 1 &&
-	    new_plane_state->plane == primary &&
-	    new_plane_state->fb)
-		trace_i915_flip_complete(intel_crtc->plane,
-					 intel_fb_obj(new_plane_state->fb));
-
-	if (work->can_async_unpin) {
-		list_del_init(&work->head);
-		wake_up_all(&dev_priv->pending_flip_queue);
-	}
-
+	wake_up_all(&dev_priv->pending_flip_queue);
 	queue_work(dev_priv->wq, &work->unpin_work);
+
+	trace_i915_flip_complete(intel_crtc->plane,
+				 work->pending_flip_obj);
 }
 
 static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
@@ -3805,7 +3844,18 @@ static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
 	if (ret < 0)
 		return ret;
 
-	WARN(ret == 0, "Stuck page flip\n");
+	if (ret == 0) {
+		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+		struct intel_flip_work *work;
+
+		spin_lock_irq(&dev->event_lock);
+		work = intel_crtc->flip_work;
+		if (work && !is_mmio_work(work)) {
+			WARN_ONCE(1, "Removing stuck page flip\n");
+			page_flip_completed(intel_crtc);
+		}
+		spin_unlock_irq(&dev->event_lock);
+	}
 
 	return 0;
 }
@@ -4536,6 +4586,39 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
 	}
 }
 
+static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct drm_atomic_state *old_state = old_crtc_state->base.state;
+	struct intel_crtc_state *pipe_config =
+		to_intel_crtc_state(crtc->base.state);
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_plane *primary = crtc->base.primary;
+	struct drm_plane_state *old_pri_state =
+		drm_atomic_get_existing_plane_state(old_state, primary);
+
+	intel_frontbuffer_flip(dev, pipe_config->fb_bits);
+
+	crtc->wm.cxsr_allowed = true;
+
+	if (pipe_config->update_wm_post && pipe_config->base.active)
+		intel_update_watermarks(&crtc->base);
+
+	if (old_pri_state) {
+		struct intel_plane_state *primary_state =
+			to_intel_plane_state(primary->state);
+		struct intel_plane_state *old_primary_state =
+			to_intel_plane_state(old_pri_state);
+
+		intel_fbc_post_update(crtc);
+
+		if (primary_state->visible &&
+		    (needs_modeset(&pipe_config->base) ||
+		     !old_primary_state->visible))
+			intel_post_enable_primary(&crtc->base);
+	}
+}
+
 static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
@@ -4555,7 +4638,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
 		struct intel_plane_state *old_primary_state =
 			to_intel_plane_state(old_pri_state);
 
-		intel_fbc_pre_update(crtc, pipe_config, primary_state);
+		intel_fbc_pre_update(crtc);
 
 		if (old_primary_state->visible &&
 		    (modeset || !primary_state->visible))
@@ -5145,21 +5228,18 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc,
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum intel_display_power_domain domain;
-	unsigned long domains, new_domains, old_domains, ms_domain = 0;
+	unsigned long domains, new_domains, old_domains;
 
 	old_domains = intel_crtc->enabled_power_domains;
 	intel_crtc->enabled_power_domains = new_domains =
 		get_crtc_power_domains(crtc, crtc_state);
 
-	if (needs_modeset(&crtc_state->base))
-		ms_domain = BIT(POWER_DOMAIN_MODESET);
-
-	domains = (new_domains & ~old_domains) | ms_domain;
+	domains = new_domains & ~old_domains;
 
 	for_each_power_domain(domain, domains)
 		intel_display_power_get(dev_priv, domain);
 
-	return (old_domains & ~new_domains) | ms_domain;
+	return old_domains & ~new_domains;
 }
 
 static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
@@ -6193,7 +6273,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 		return;
 
 	if (to_intel_plane_state(crtc->primary->state)->visible) {
-		WARN_ON(list_empty(&intel_crtc->flip_work));
+		WARN_ON(intel_crtc->flip_work);
 
 		intel_pre_disable_primary_noatomic(crtc);
 
@@ -6245,12 +6325,6 @@ int intel_display_suspend(struct drm_device *dev)
 		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
 	else
 		dev_priv->modeset_restore_state = state;
-
-	/*
-	 * Make sure all unpin_work completes before returning.
-	 */
-	flush_workqueue(dev_priv->wq);
-
 	return ret;
 }
 
@@ -6264,10 +6338,9 @@ void intel_encoder_destroy(struct drm_encoder *encoder)
 
 /* Cross check the actual hw state with our own modeset state tracking (and it's
  * internal consistency). */
-static void intel_connector_verify_state(struct intel_connector *connector,
-					 struct drm_connector_state *conn_state)
+static void intel_connector_verify_state(struct intel_connector *connector)
 {
-	struct drm_crtc *crtc = conn_state->crtc;
+	struct drm_crtc *crtc = connector->base.state->crtc;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.base.id,
@@ -6275,6 +6348,7 @@ static void intel_connector_verify_state(struct intel_connector *connector,
 
 	if (connector->get_hw_state(connector)) {
 		struct intel_encoder *encoder = connector->encoder;
+		struct drm_connector_state *conn_state = connector->base.state;
 
 		I915_STATE_WARN(!crtc,
 			 "connector enabled without attached crtc\n");
@@ -6296,7 +6370,7 @@ static void intel_connector_verify_state(struct intel_connector *connector,
 	} else {
 		I915_STATE_WARN(crtc && crtc->state->active,
 			"attached crtc is active, but connector isn't\n");
-		I915_STATE_WARN(!crtc && conn_state->best_encoder,
+		I915_STATE_WARN(!crtc && connector->base.state->best_encoder,
 			"best encoder set without crtc!\n");
 	}
 }
@@ -10854,13 +10928,6 @@ void intel_mark_idle(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
-void intel_free_flip_work(struct intel_flip_work *work)
-{
-	kfree(work->old_connector_state);
-	kfree(work->new_connector_state);
-	kfree(work);
-}
-
 static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -10868,278 +10935,885 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 	struct intel_flip_work *work;
 
 	spin_lock_irq(&dev->event_lock);
-	while (!list_empty(&intel_crtc->flip_work)) {
-		work = list_first_entry(&intel_crtc->flip_work,
-					struct intel_flip_work, head);
-		list_del_init(&work->head);
-		spin_unlock_irq(&dev->event_lock);
+	work = intel_crtc->flip_work;
+	intel_crtc->flip_work = NULL;
+	spin_unlock_irq(&dev->event_lock);
 
+	if (work) {
 		cancel_work_sync(&work->mmio_work);
 		cancel_work_sync(&work->unpin_work);
-		intel_free_flip_work(work);
-
-		spin_lock_irq(&dev->event_lock);
+		kfree(work);
 	}
-	spin_unlock_irq(&dev->event_lock);
 
 	drm_crtc_cleanup(crtc);
 
 	kfree(intel_crtc);
 }
 
-static void intel_crtc_post_flip_update(struct intel_flip_work *work,
-					struct drm_crtc *crtc)
+static void intel_unpin_work_fn(struct work_struct *__work)
 {
-	struct intel_crtc_state *crtc_state = work->new_crtc_state;
+	struct intel_flip_work *work =
+		container_of(__work, struct intel_flip_work, unpin_work);
+	struct intel_crtc *crtc = to_intel_crtc(work->crtc);
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_plane *primary = crtc->base.primary;
+
+	if (is_mmio_work(work))
+		flush_work(&work->mmio_work);
+
+	mutex_lock(&dev->struct_mutex);
+	intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
+	drm_gem_object_unreference(&work->pending_flip_obj->base);
+
+	if (work->flip_queued_req)
+		i915_gem_request_assign(&work->flip_queued_req, NULL);
+	mutex_unlock(&dev->struct_mutex);
+
+	intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+	intel_fbc_post_update(crtc);
+	drm_framebuffer_unreference(work->old_fb);
+
+	BUG_ON(atomic_read(&crtc->unpin_work_count) == 0);
+	atomic_dec(&crtc->unpin_work_count);
+
+	kfree(work);
+}
+
+/* Is 'a' after or equal to 'b'? */
+static bool g4x_flip_count_after_eq(u32 a, u32 b)
+{
+	return !((a - b) & 0x80000000);
+}
+
+static bool __pageflip_finished_cs(struct intel_crtc *crtc,
+				   struct intel_flip_work *work)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned reset_counter;
+
+	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+	if (crtc->reset_counter != reset_counter)
+		return true;
+
+	/*
+	 * The relevant registers doen't exist on pre-ctg.
+	 * As the flip done interrupt doesn't trigger for mmio
+	 * flips on gmch platforms, a flip count check isn't
+	 * really needed there. But since ctg has the registers,
+	 * include it in the check anyway.
+	 */
+	if (INTEL_INFO(dev)->gen < 5 && !IS_G4X(dev))
+		return true;
+
+	/*
+	 * BDW signals flip done immediately if the plane
+	 * is disabled, even if the plane enable is already
+	 * armed to occur at the next vblank :(
+	 */
+
+	/*
+	 * A DSPSURFLIVE check isn't enough in case the mmio and CS flips
+	 * used the same base address. In that case the mmio flip might
+	 * have completed, but the CS hasn't even executed the flip yet.
+	 *
+	 * A flip count check isn't enough as the CS might have updated
+	 * the base address just after start of vblank, but before we
+	 * managed to process the interrupt. This means we'd complete the
+	 * CS flip too soon.
+	 *
+	 * Combining both checks should get us a good enough result. It may
+	 * still happen that the CS flip has been executed, but has not
+	 * yet actually completed. But in case the base address is the same
+	 * anyway, we don't really care.
+	 */
+	return (I915_READ(DSPSURFLIVE(crtc->plane)) & ~0xfff) ==
+		crtc->flip_work->gtt_offset &&
+		g4x_flip_count_after_eq(I915_READ(PIPE_FLIPCOUNT_G4X(crtc->pipe)),
+				    crtc->flip_work->flip_count);
+}
+
+static bool
+__pageflip_finished_mmio(struct intel_crtc *crtc,
+			       struct intel_flip_work *work)
+{
+	/*
+	 * MMIO work completes when vblank is different from
+	 * flip_queued_vblank.
+	 *
+	 * Reset counter value doesn't matter, this is handled by
+	 * i915_wait_request finishing early, so no need to handle
+	 * reset here.
+	 */
+	return intel_crtc_get_vblank_counter(crtc) != work->flip_queued_vblank;
+}
+
+
+static bool pageflip_finished(struct intel_crtc *crtc,
+			      struct intel_flip_work *work)
+{
+	if (!atomic_read(&work->pending))
+		return false;
+
+	smp_rmb();
+
+	if (is_mmio_work(work))
+		return __pageflip_finished_mmio(crtc, work);
+	else
+		return __pageflip_finished_cs(crtc, work);
+}
+
+void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_flip_work *work;
+	unsigned long flags;
 
-	if (crtc_state->disable_cxsr)
-		intel_crtc->wm.cxsr_allowed = true;
+	/* Ignore early vblank irqs */
+	if (!crtc)
+		return;
 
-	if (crtc_state->update_wm_post && crtc_state->base.active)
-		intel_update_watermarks(crtc);
+	/*
+	 * This is called both by irq handlers and the reset code (to complete
+	 * lost pageflips) so needs the full irqsave spinlocks.
+	 */
+	spin_lock_irqsave(&dev->event_lock, flags);
+	work = intel_crtc->flip_work;
 
-	if (work->num_planes > 0 &&
-	    work->old_plane_state[0]->base.plane == crtc->primary) {
-		struct intel_plane_state *plane_state =
-			work->new_plane_state[0];
+	if (work != NULL &&
+	    !is_mmio_work(work) &&
+	    pageflip_finished(intel_crtc, work))
+		page_flip_completed(intel_crtc);
 
-		if (plane_state->visible &&
-		    (needs_modeset(&crtc_state->base) ||
-		     !work->old_plane_state[0]->visible))
-			intel_post_enable_primary(crtc);
-	}
+	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
-static void intel_unpin_work_fn(struct work_struct *__work)
+void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe)
 {
-	struct intel_flip_work *work =
-		container_of(__work, struct intel_flip_work, unpin_work);
-	struct drm_crtc *crtc = work->old_crtc_state->base.crtc;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
+	struct intel_flip_work *work;
+	unsigned long flags;
 
-	if (work->fb_bits)
-		intel_frontbuffer_flip_complete(dev, work->fb_bits);
+	/* Ignore early vblank irqs */
+	if (!crtc)
+		return;
 
 	/*
-	 * Unless work->can_async_unpin is false, there's no way to ensure
-	 * that work->new_crtc_state contains valid memory during unpin
-	 * because intel_atomic_commit may free it before this runs.
+	 * This is called both by irq handlers and the reset code (to complete
+	 * lost pageflips) so needs the full irqsave spinlocks.
 	 */
-	if (!work->can_async_unpin) {
-		intel_crtc_post_flip_update(work, crtc);
+	spin_lock_irqsave(&dev->event_lock, flags);
+	work = intel_crtc->flip_work;
 
-		if (dev_priv->display.optimize_watermarks)
-			dev_priv->display.optimize_watermarks(work->new_crtc_state);
-	}
+	if (work != NULL &&
+	    is_mmio_work(work) &&
+	    pageflip_finished(intel_crtc, work))
+		page_flip_completed(intel_crtc);
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static inline void intel_mark_page_flip_active(struct intel_crtc *crtc,
+					       struct intel_flip_work *work)
+{
+	work->flip_queued_vblank = intel_crtc_get_vblank_counter(crtc);
 
-	if (work->fb_bits & to_intel_plane(crtc->primary)->frontbuffer_bit)
-		intel_fbc_post_update(intel_crtc);
+	/* Ensure that the work item is consistent when activating it ... */
+	smp_mb__before_atomic();
+	atomic_set(&work->pending, 1);
+}
 
-	if (work->put_power_domains)
-		modeset_put_power_domains(dev_priv, work->put_power_domains);
+static int intel_gen2_queue_flip(struct drm_device *dev,
+				 struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_i915_gem_request *req,
+				 uint32_t flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	u32 flip_mask;
+	int ret;
 
-	/* Make sure mmio work is completely finished before freeing all state here. */
-	flush_work(&work->mmio_work);
+	ret = intel_ring_begin(req, 6);
+	if (ret)
+		return ret;
 
-	if (!work->can_async_unpin &&
-	    (work->new_crtc_state->update_pipe ||
-	     needs_modeset(&work->new_crtc_state->base))) {
-		/* This must be called before work is unpinned for serialization. */
-		intel_modeset_verify_crtc(crtc, &work->old_crtc_state->base,
-					  &work->new_crtc_state->base);
+	/* Can't queue multiple flips, so wait for the previous
+	 * one to finish before executing the next.
+	 */
+	if (intel_crtc->plane)
+		flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+	else
+		flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
+	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_emit(engine, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+	intel_ring_emit(engine, fb->pitches[0]);
+	intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+	intel_ring_emit(engine, 0); /* aux display base address, unused */
 
-		for (i = 0; i < work->num_new_connectors; i++) {
-			struct drm_connector_state *conn_state =
-				work->new_connector_state[i];
-			struct drm_connector *con = conn_state->connector;
+	return 0;
+}
 
-			WARN_ON(!con);
+static int intel_gen3_queue_flip(struct drm_device *dev,
+				 struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_i915_gem_request *req,
+				 uint32_t flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	u32 flip_mask;
+	int ret;
 
-			intel_connector_verify_state(to_intel_connector(con),
-						     conn_state);
-		}
-	}
+	ret = intel_ring_begin(req, 6);
+	if (ret)
+		return ret;
 
-	for (i = 0; i < work->num_old_connectors; i++) {
-		struct drm_connector_state *old_con_state =
-			work->old_connector_state[i];
-		struct drm_connector *con =
-			old_con_state->connector;
+	if (intel_crtc->plane)
+		flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+	else
+		flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
+	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
+			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+	intel_ring_emit(engine, fb->pitches[0]);
+	intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+	intel_ring_emit(engine, MI_NOOP);
 
-		con->funcs->atomic_destroy_state(con, old_con_state);
+	return 0;
+}
+
+static int intel_gen4_queue_flip(struct drm_device *dev,
+				 struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_i915_gem_request *req,
+				 uint32_t flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	uint32_t pf, pipesrc;
+	int ret;
+
+	ret = intel_ring_begin(req, 4);
+	if (ret)
+		return ret;
+
+	/* i965+ uses the linear or tiled offsets from the
+	 * Display Registers (which do not change across a page-flip)
+	 * so we need only reprogram the base address.
+	 */
+	intel_ring_emit(engine, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+	intel_ring_emit(engine, fb->pitches[0]);
+	intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset |
+			obj->tiling_mode);
+
+	/* XXX Enabling the panel-fitter across page-flip is so far
+	 * untested on non-native modes, so ignore it for now.
+	 * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE;
+	 */
+	pf = 0;
+	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
+	intel_ring_emit(engine, pf | pipesrc);
+
+	return 0;
+}
+
+static int intel_gen6_queue_flip(struct drm_device *dev,
+				 struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_i915_gem_request *req,
+				 uint32_t flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	uint32_t pf, pipesrc;
+	int ret;
+
+	ret = intel_ring_begin(req, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(engine, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+	intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
+	intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+
+	/* Contrary to the suggestions in the documentation,
+	 * "Enable Panel Fitter" does not seem to be required when page
+	 * flipping with a non-native mode, and worse causes a normal
+	 * modeset to fail.
+	 * pf = I915_READ(PF_CTL(intel_crtc->pipe)) & PF_ENABLE;
+	 */
+	pf = 0;
+	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
+	intel_ring_emit(engine, pf | pipesrc);
+
+	return 0;
+}
+
+static int intel_gen7_queue_flip(struct drm_device *dev,
+				 struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_i915_gem_object *obj,
+				 struct drm_i915_gem_request *req,
+				 uint32_t flags)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	uint32_t plane_bit = 0;
+	int len, ret;
+
+	switch (intel_crtc->plane) {
+	case PLANE_A:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
+		break;
+	case PLANE_B:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B;
+		break;
+	case PLANE_C:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C;
+		break;
+	default:
+		WARN_ONCE(1, "unknown plane in flip command\n");
+		return -ENODEV;
 	}
 
-	if (!work->can_async_unpin || !list_empty(&work->head)) {
-		spin_lock_irq(&dev->event_lock);
-		WARN(list_empty(&work->head) != work->can_async_unpin,
-		     "[CRTC:%i] Pin work %p async %i with %i planes, active %i -> %i ms %i\n",
-		     crtc->base.id, work, work->can_async_unpin, work->num_planes,
-		     work->old_crtc_state->base.active, work->new_crtc_state->base.active,
-		     needs_modeset(&work->new_crtc_state->base));
+	len = 4;
+	if (engine->id == RCS) {
+		len += 6;
+		/*
+		 * On Gen 8, SRM is now taking an extra dword to accommodate
+		 * 48bits addresses, and we need a NOOP for the batch size to
+		 * stay even.
+		 */
+		if (IS_GEN8(dev))
+			len += 2;
+	}
 
-		if (!list_empty(&work->head))
-			list_del(&work->head);
+	/*
+	 * BSpec MI_DISPLAY_FLIP for IVB:
+	 * "The full packet must be contained within the same cache line."
+	 *
+	 * Currently the LRI+SRM+MI_DISPLAY_FLIP all fit within the same
+	 * cacheline, if we ever start emitting more commands before
+	 * the MI_DISPLAY_FLIP we may need to first emit everything else,
+	 * then do the cacheline alignment, and finally emit the
+	 * MI_DISPLAY_FLIP.
+	 */
+	ret = intel_ring_cacheline_align(req);
+	if (ret)
+		return ret;
 
-		wake_up_all(&dev_priv->pending_flip_queue);
-		spin_unlock_irq(&dev->event_lock);
+	ret = intel_ring_begin(req, len);
+	if (ret)
+		return ret;
+
+	/* Unmask the flip-done completion message. Note that the bspec says that
+	 * we should do this for both the BCS and RCS, and that we must not unmask
+	 * more than one flip event at any time (or ensure that one flip message
+	 * can be sent by waiting for flip-done prior to queueing new flips).
+	 * Experimentation says that BCS works despite DERRMR masking all
+	 * flip-done completion events and that unmasking all planes at once
+	 * for the RCS also doesn't appear to drop events. Setting the DERRMR
+	 * to zero does lead to lockups within MI_DISPLAY_FLIP.
+	 */
+	if (engine->id == RCS) {
+		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit_reg(engine, DERRMR);
+		intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+					  DERRMR_PIPEB_PRI_FLIP_DONE |
+					  DERRMR_PIPEC_PRI_FLIP_DONE));
+		if (IS_GEN8(dev))
+			intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 |
+					      MI_SRM_LRM_GLOBAL_GTT);
+		else
+			intel_ring_emit(engine, MI_STORE_REGISTER_MEM |
+					      MI_SRM_LRM_GLOBAL_GTT);
+		intel_ring_emit_reg(engine, DERRMR);
+		intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
+		if (IS_GEN8(dev)) {
+			intel_ring_emit(engine, 0);
+			intel_ring_emit(engine, MI_NOOP);
+		}
 	}
 
-	/* New crtc_state freed? */
-	if (work->free_new_crtc_state)
-		intel_crtc_destroy_state(crtc, &work->new_crtc_state->base);
+	intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
+	intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
+	intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+	intel_ring_emit(engine, (MI_NOOP));
 
-	intel_crtc_destroy_state(crtc, &work->old_crtc_state->base);
+	return 0;
+}
 
-	for (i = 0; i < work->num_planes; i++) {
-		struct intel_plane_state *old_plane_state =
-			work->old_plane_state[i];
-		struct drm_framebuffer *old_fb = old_plane_state->base.fb;
-		struct drm_plane *plane = old_plane_state->base.plane;
-		struct drm_i915_gem_request *req;
+static bool use_mmio_flip(struct intel_engine_cs *engine,
+			  struct drm_i915_gem_object *obj)
+{
+	/*
+	 * This is not being used for older platforms, because
+	 * non-availability of flip done interrupt forces us to use
+	 * CS flips. Older platforms derive flip done using some clever
+	 * tricks involving the flip_pending status bits and vblank irqs.
+	 * So using MMIO flips there would disrupt this mechanism.
+	 */
 
-		req = old_plane_state->wait_req;
-		old_plane_state->wait_req = NULL;
-		if (req)
-			i915_gem_request_unreference(req);
+	if (engine == NULL)
+		return true;
 
-		fence_put(old_plane_state->base.fence);
-		old_plane_state->base.fence = NULL;
+	if (INTEL_GEN(engine->i915) < 5)
+		return false;
 
-		if (old_fb &&
-		    (plane->type != DRM_PLANE_TYPE_CURSOR ||
-		     !INTEL_INFO(dev_priv)->cursor_needs_physical)) {
-			mutex_lock(&dev->struct_mutex);
-			intel_unpin_fb_obj(old_fb, old_plane_state->base.rotation);
-			mutex_unlock(&dev->struct_mutex);
-		}
+	if (i915.use_mmio_flip < 0)
+		return false;
+	else if (i915.use_mmio_flip > 0)
+		return true;
+	else if (i915.enable_execlists)
+		return true;
+	else if (obj->base.dma_buf &&
+		 !reservation_object_test_signaled_rcu(obj->base.dma_buf->resv,
+						       false))
+		return true;
+	else
+		return engine != i915_gem_request_get_engine(obj->last_write_req);
+}
+
+static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
+			     unsigned int rotation,
+			     struct intel_flip_work *work)
+{
+	struct drm_device *dev = intel_crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
+	const enum pipe pipe = intel_crtc->pipe;
+	u32 ctl, stride, tile_height;
+
+	ctl = I915_READ(PLANE_CTL(pipe, 0));
+	ctl &= ~PLANE_CTL_TILED_MASK;
+	switch (fb->modifier[0]) {
+	case DRM_FORMAT_MOD_NONE:
+		break;
+	case I915_FORMAT_MOD_X_TILED:
+		ctl |= PLANE_CTL_TILED_X;
+		break;
+	case I915_FORMAT_MOD_Y_TILED:
+		ctl |= PLANE_CTL_TILED_Y;
+		break;
+	case I915_FORMAT_MOD_Yf_TILED:
+		ctl |= PLANE_CTL_TILED_YF;
+		break;
+	default:
+		MISSING_CASE(fb->modifier[0]);
+	}
 
-		intel_plane_destroy_state(plane, &old_plane_state->base);
+	/*
+	 * The stride is either expressed as a multiple of 64 bytes chunks for
+	 * linear buffers or in number of tiles for tiled buffers.
+	 */
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride = Surface height in tiles */
+		tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+	} else {
+		stride = fb->pitches[0] /
+			intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+						  fb->pixel_format);
 	}
 
-	if (!WARN_ON(atomic_read(&intel_crtc->unpin_work_count) == 0))
-		atomic_dec(&intel_crtc->unpin_work_count);
+	/*
+	 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
+	 * PLANE_SURF updates, the update is then guaranteed to be atomic.
+	 */
+	I915_WRITE(PLANE_CTL(pipe, 0), ctl);
+	I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
 
-	intel_free_flip_work(work);
+	I915_WRITE(PLANE_SURF(pipe, 0), work->gtt_offset);
+	POSTING_READ(PLANE_SURF(pipe, 0));
 }
 
+static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc,
+			     struct intel_flip_work *work)
+{
+	struct drm_device *dev = intel_crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(intel_crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
+	i915_reg_t reg = DSPCNTR(intel_crtc->plane);
+	u32 dspcntr;
 
-static bool pageflip_finished(struct intel_crtc *crtc,
-			      struct intel_flip_work *work)
+	dspcntr = I915_READ(reg);
+
+	if (obj->tiling_mode != I915_TILING_NONE)
+		dspcntr |= DISPPLANE_TILED;
+	else
+		dspcntr &= ~DISPPLANE_TILED;
+
+	I915_WRITE(reg, dspcntr);
+
+	I915_WRITE(DSPSURF(intel_crtc->plane), work->gtt_offset);
+	POSTING_READ(DSPSURF(intel_crtc->plane));
+}
+
+static void intel_mmio_flip_work_func(struct work_struct *w)
 {
+	struct intel_flip_work *work =
+		container_of(w, struct intel_flip_work, mmio_work);
+	struct intel_crtc *crtc = to_intel_crtc(work->crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
+
+	if (work->flip_queued_req)
+		WARN_ON(__i915_wait_request(work->flip_queued_req,
+					    false, NULL,
+					    &dev_priv->rps.mmioflips));
+
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj->base.dma_buf)
+		WARN_ON(reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							    false, false,
+							    MAX_SCHEDULE_TIMEOUT) < 0);
+
+	intel_pipe_update_start(crtc);
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		skl_do_mmio_flip(crtc, work->rotation, work);
+	else
+		/* use_mmio_flip() retricts MMIO flips to ilk+ */
+		ilk_do_mmio_flip(crtc, work);
+
+	intel_pipe_update_end(crtc, work);
+}
+
+static int intel_default_queue_flip(struct drm_device *dev,
+				    struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb,
+				    struct drm_i915_gem_object *obj,
+				    struct drm_i915_gem_request *req,
+				    uint32_t flags)
+{
+	return -ENODEV;
+}
+
+static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
+				      struct intel_crtc *intel_crtc,
+				      struct intel_flip_work *work)
+{
+	u32 addr, vblank;
+
 	if (!atomic_read(&work->pending))
 		return false;
 
 	smp_rmb();
 
-	/*
-	 * MMIO work completes when vblank is different from
-	 * flip_queued_vblank.
+	vblank = intel_crtc_get_vblank_counter(intel_crtc);
+	if (work->flip_ready_vblank == 0) {
+		if (work->flip_queued_req &&
+		    !i915_gem_request_completed(work->flip_queued_req, true))
+			return false;
+
+		work->flip_ready_vblank = vblank;
+	}
+
+	if (vblank - work->flip_ready_vblank < 3)
+		return false;
+
+	/* Potential stall - if we see that the flip has happened,
+	 * assume a missed interrupt. */
+	if (INTEL_GEN(dev_priv) >= 4)
+		addr = I915_HI_DISPBASE(I915_READ(DSPSURF(intel_crtc->plane)));
+	else
+		addr = I915_READ(DSPADDR(intel_crtc->plane));
+
+	/* There is a potential issue here with a false positive after a flip
+	 * to the same address. We could address this by checking for a
+	 * non-incrementing frame counter.
 	 */
-	return intel_crtc_get_vblank_counter(crtc) != work->flip_queued_vblank;
+	return addr == work->gtt_offset;
 }
 
-void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe)
+void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe)
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_flip_work *work;
-	unsigned long flags;
 
-	/* Ignore early vblank irqs */
-	if (!crtc)
-		return;
+	WARN_ON(!in_interrupt());
 
-	/*
-	 * This is called both by irq handlers and the reset code (to complete
-	 * lost pageflips) so needs the full irqsave spinlocks.
-	 */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	while (!list_empty(&intel_crtc->flip_work)) {
-		work = list_first_entry(&intel_crtc->flip_work,
-					struct intel_flip_work,
-					head);
+	if (crtc == NULL)
+		return;
 
-		if (!pageflip_finished(intel_crtc, work) ||
-		    work_busy(&work->unpin_work))
-			break;
+	spin_lock(&dev->event_lock);
+	work = intel_crtc->flip_work;
 
-		page_flip_completed(intel_crtc, work);
+	if (work != NULL && !is_mmio_work(work) &&
+	    __pageflip_stall_check_cs(dev_priv, intel_crtc, work)) {
+		WARN_ONCE(1,
+			  "Kicking stuck page flip: queued at %d, now %d\n",
+			work->flip_queued_vblank, intel_crtc_get_vblank_counter(intel_crtc));
+		page_flip_completed(intel_crtc);
+		work = NULL;
 	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	if (work != NULL && !is_mmio_work(work) &&
+	    intel_crtc_get_vblank_counter(intel_crtc) - work->flip_queued_vblank > 1)
+		intel_queue_rps_boost_for_request(work->flip_queued_req);
+	spin_unlock(&dev->event_lock);
 }
 
-static void intel_mmio_flip_work_func(struct work_struct *w)
+static int intel_crtc_page_flip(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags)
 {
-	struct intel_flip_work *work =
-		container_of(w, struct intel_flip_work, mmio_work);
-	struct drm_crtc *crtc = work->old_crtc_state->base.crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_crtc_state *crtc_state = work->new_crtc_state;
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *req;
-	int i, ret;
+	struct drm_framebuffer *old_fb = crtc->primary->fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_plane *primary = crtc->primary;
+	enum pipe pipe = intel_crtc->pipe;
+	struct intel_flip_work *work;
+	struct intel_engine_cs *engine;
+	bool mmio_flip;
+	struct drm_i915_gem_request *request = NULL;
+	int ret;
+
+	/*
+	 * drm_mode_page_flip_ioctl() should already catch this, but double
+	 * check to be safe.  In the future we may enable pageflipping from
+	 * a disabled primary plane.
+	 */
+	if (WARN_ON(intel_fb_obj(old_fb) == NULL))
+		return -EBUSY;
+
+	/* Can't change pixel format via MI display flips. */
+	if (fb->pixel_format != crtc->primary->fb->pixel_format)
+		return -EINVAL;
+
+	/*
+	 * TILEOFF/LINOFF registers can't be changed via MI display flips.
+	 * Note that pitch changes could also affect these register.
+	 */
+	if (INTEL_INFO(dev)->gen > 3 &&
+	    (fb->offsets[0] != crtc->primary->fb->offsets[0] ||
+	     fb->pitches[0] != crtc->primary->fb->pitches[0]))
+		return -EINVAL;
+
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		goto out_hang;
 
-	if (!needs_modeset(&crtc_state->base) && crtc_state->update_pipe) {
-		work->put_power_domains =
-			modeset_get_crtc_power_domains(crtc, crtc_state);
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	work->event = event;
+	work->crtc = crtc;
+	work->old_fb = old_fb;
+	INIT_WORK(&work->unpin_work, intel_unpin_work_fn);
+
+	ret = drm_crtc_vblank_get(crtc);
+	if (ret)
+		goto free_work;
+
+	/* We borrow the event spin lock for protecting flip_work */
+	spin_lock_irq(&dev->event_lock);
+	if (intel_crtc->flip_work) {
+		/* Before declaring the flip queue wedged, check if
+		 * the hardware completed the operation behind our backs.
+		 */
+		if (pageflip_finished(intel_crtc, intel_crtc->flip_work)) {
+			DRM_DEBUG_DRIVER("flip queue: previous flip completed, continuing\n");
+			page_flip_completed(intel_crtc);
+		} else {
+			DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+			spin_unlock_irq(&dev->event_lock);
+
+			drm_crtc_vblank_put(crtc);
+			kfree(work);
+			return -EBUSY;
+		}
 	}
+	intel_crtc->flip_work = work;
+	spin_unlock_irq(&dev->event_lock);
 
-	for (i = 0; i < work->num_planes; i++) {
-		struct intel_plane_state *old_plane_state = work->old_plane_state[i];
+	if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
+		flush_workqueue(dev_priv->wq);
 
-		/* For framebuffer backed by dmabuf, wait for fence */
-		if (old_plane_state->base.fence)
-			WARN_ON(fence_wait(old_plane_state->base.fence, false) < 0);
+	/* Reference the objects for the scheduled work. */
+	drm_framebuffer_reference(work->old_fb);
+	drm_gem_object_reference(&obj->base);
 
-		req = old_plane_state->wait_req;
-		if (!req)
-			continue;
+	crtc->primary->fb = fb;
+	update_state_fb(crtc->primary);
+	intel_fbc_pre_update(intel_crtc);
 
-		WARN_ON(__i915_wait_request(req, false, NULL,
-					    &dev_priv->rps.mmioflips));
+	work->pending_flip_obj = obj;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto cleanup;
+
+	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+		ret = -EIO;
+		goto cleanup;
 	}
 
-	ret = drm_crtc_vblank_get(crtc);
-	I915_STATE_WARN(ret < 0, "enabling vblank failed with %i\n", ret);
+	atomic_inc(&intel_crtc->unpin_work_count);
+
+	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
+		work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1;
 
-	if (work->num_planes &&
-	    work->old_plane_state[0]->base.plane == crtc->primary)
-		intel_fbc_enable(intel_crtc, work->new_crtc_state, work->new_plane_state[0]);
+	if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
+		engine = &dev_priv->engine[BCS];
+		if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode)
+			/* vlv: DISPLAY_FLIP fails to change tiling */
+			engine = NULL;
+	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
+		engine = &dev_priv->engine[BCS];
+	} else if (INTEL_INFO(dev)->gen >= 7) {
+		engine = i915_gem_request_get_engine(obj->last_write_req);
+		if (engine == NULL || engine->id != RCS)
+			engine = &dev_priv->engine[BCS];
+	} else {
+		engine = &dev_priv->engine[RCS];
+	}
 
-	intel_frontbuffer_flip_prepare(dev, work->fb_bits);
+	mmio_flip = use_mmio_flip(engine, obj);
 
-	intel_pipe_update_start(intel_crtc);
-	if (!needs_modeset(&crtc_state->base)) {
-		if (crtc_state->base.color_mgmt_changed || crtc_state->update_pipe) {
-			intel_color_set_csc(&crtc_state->base);
-			intel_color_load_luts(&crtc_state->base);
+	/* When using CS flips, we want to emit semaphores between rings.
+	 * However, when using mmio flips we will create a task to do the
+	 * synchronisation, so all we want here is to pin the framebuffer
+	 * into the display plane and skip any waits.
+	 */
+	if (!mmio_flip) {
+		ret = i915_gem_object_sync(obj, engine, &request);
+		if (!ret && !request) {
+			request = i915_gem_request_alloc(engine, NULL);
+			ret = PTR_ERR_OR_ZERO(request);
 		}
 
-		if (crtc_state->update_pipe)
-			intel_update_pipe_config(intel_crtc, work->old_crtc_state);
-		else if (INTEL_INFO(dev)->gen >= 9)
-			skl_detach_scalers(intel_crtc);
+		if (ret)
+			goto cleanup_pending;
 	}
 
-	for (i = 0; i < work->num_planes; i++) {
-		struct intel_plane_state *new_plane_state = work->new_plane_state[i];
-		struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
+	ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+	if (ret)
+		goto cleanup_pending;
+
+	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
+						  obj, 0);
+	work->gtt_offset += intel_crtc->dspaddr_offset;
+	work->rotation = crtc->primary->state->rotation;
 
-		if (new_plane_state->visible)
-			plane->update_plane(&plane->base, crtc_state, new_plane_state);
-		else
-			plane->disable_plane(&plane->base, crtc);
+	if (mmio_flip) {
+		INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
+
+		i915_gem_request_assign(&work->flip_queued_req,
+					obj->last_write_req);
+
+		schedule_work(&work->mmio_work);
+	} else {
+		i915_gem_request_assign(&work->flip_queued_req, request);
+		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
+						   page_flip_flags);
+		if (ret)
+			goto cleanup_unpin;
+
+		intel_mark_page_flip_active(intel_crtc, work);
+
+		i915_add_request_no_flush(request);
 	}
 
-	intel_pipe_update_end(intel_crtc, work);
+	i915_gem_track_fb(intel_fb_obj(old_fb), obj,
+			  to_intel_plane(primary)->frontbuffer_bit);
+	mutex_unlock(&dev->struct_mutex);
+
+	intel_frontbuffer_flip_prepare(dev,
+				       to_intel_plane(primary)->frontbuffer_bit);
+
+	trace_i915_flip_request(intel_crtc->plane, obj);
+
+	return 0;
+
+cleanup_unpin:
+	intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
+cleanup_pending:
+	if (!IS_ERR_OR_NULL(request))
+		i915_add_request_no_flush(request);
+	atomic_dec(&intel_crtc->unpin_work_count);
+	mutex_unlock(&dev->struct_mutex);
+cleanup:
+	crtc->primary->fb = old_fb;
+	update_state_fb(crtc->primary);
+
+	drm_gem_object_unreference_unlocked(&obj->base);
+	drm_framebuffer_unreference(work->old_fb);
+
+	spin_lock_irq(&dev->event_lock);
+	intel_crtc->flip_work = NULL;
+	spin_unlock_irq(&dev->event_lock);
+
+	drm_crtc_vblank_put(crtc);
+free_work:
+	kfree(work);
+
+	if (ret == -EIO) {
+		struct drm_atomic_state *state;
+		struct drm_plane_state *plane_state;
+
+out_hang:
+		state = drm_atomic_state_alloc(dev);
+		if (!state)
+			return -ENOMEM;
+		state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
+
+retry:
+		plane_state = drm_atomic_get_plane_state(state, primary);
+		ret = PTR_ERR_OR_ZERO(plane_state);
+		if (!ret) {
+			drm_atomic_set_fb_for_plane(plane_state, fb);
+
+			ret = drm_atomic_set_crtc_for_plane(plane_state, crtc);
+			if (!ret)
+				ret = drm_atomic_commit(state);
+		}
+
+		if (ret == -EDEADLK) {
+			drm_modeset_backoff(state->acquire_ctx);
+			drm_atomic_state_clear(state);
+			goto retry;
+		}
+
+		if (ret)
+			drm_atomic_state_free(state);
+
+		if (ret == 0 && event) {
+			spin_lock_irq(&dev->event_lock);
+			drm_crtc_send_vblank_event(crtc, event);
+			spin_unlock_irq(&dev->event_lock);
+		}
+	}
+	return ret;
 }
 
+
 /**
  * intel_wm_need_update - Check whether watermarks need updating
  * @plane: drm plane
@@ -11416,6 +12090,8 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 
 static const struct drm_crtc_helper_funcs intel_helper_funcs = {
 	.mode_set_base_atomic = intel_pipe_set_base_atomic,
+	.atomic_begin = intel_begin_crtc_commit,
+	.atomic_flush = intel_finish_crtc_commit,
 	.atomic_check = intel_crtc_atomic_check,
 };
 
@@ -12255,8 +12931,7 @@ verify_connector_state(struct drm_device *dev, struct drm_crtc *crtc)
 		if (state->crtc != crtc)
 			continue;
 
-		intel_connector_verify_state(to_intel_connector(connector),
-					     connector->state);
+		intel_connector_verify_state(to_intel_connector(connector));
 
 		I915_STATE_WARN(state->best_encoder != encoder,
 		     "connector's atomic encoder doesn't match legacy encoder\n");
@@ -12458,7 +13133,12 @@ intel_modeset_verify_crtc(struct drm_crtc *crtc,
 			 struct drm_crtc_state *old_state,
 			 struct drm_crtc_state *new_state)
 {
+	if (!needs_modeset(new_state) &&
+	    !to_intel_crtc_state(new_state)->update_pipe)
+		return;
+
 	verify_wm_state(crtc, new_state);
+	verify_connector_state(crtc->dev, crtc);
 	verify_crtc_state(crtc, old_state, new_state);
 	verify_shared_dpll_state(crtc->dev, crtc, old_state, new_state);
 }
@@ -12810,83 +13490,32 @@ static int intel_atomic_check(struct drm_device *dev,
 	return calc_watermark_data(state);
 }
 
-static bool needs_work(struct drm_crtc_state *crtc_state)
-{
-	/* hw state checker needs to run */
-	if (needs_modeset(crtc_state))
-		return true;
-
-	/* unpin old fb's, possibly vblank update */
-	if (crtc_state->planes_changed)
-		return true;
-
-	/* pipe parameters need to be updated, and hw state checker */
-	if (to_intel_crtc_state(crtc_state)->update_pipe)
-		return true;
-
-	/* vblank event requested? */
-	if (crtc_state->event)
-		return true;
-
-	return false;
-}
-
 static int intel_atomic_prepare_commit(struct drm_device *dev,
 				       struct drm_atomic_state *state,
 				       bool nonblock)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_plane_state *plane_state;
 	struct drm_crtc_state *crtc_state;
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	int i, ret;
 
-	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-		struct intel_flip_work *work;
-
-		if (!state->legacy_cursor_update) {
-			ret = intel_crtc_wait_for_pending_flips(crtc);
-			if (ret)
-				return ret;
-
-			if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
-				flush_workqueue(dev_priv->wq);
-		}
+	if (nonblock) {
+		DRM_DEBUG_KMS("i915 does not yet support nonblocking commit\n");
+		return -EINVAL;
+	}
 
-		/* test if we need to update something */
-		if (!needs_work(crtc_state))
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (state->legacy_cursor_update)
 			continue;
 
-		intel_state->work[i] = work =
-			kzalloc(sizeof(**intel_state->work), GFP_KERNEL);
-
-		if (!work)
-			return -ENOMEM;
-
-		if (needs_modeset(crtc_state) ||
-		    to_intel_crtc_state(crtc_state)->update_pipe) {
-			work->num_old_connectors = hweight32(crtc->state->connector_mask);
-
-			work->old_connector_state = kcalloc(work->num_old_connectors,
-							    sizeof(*work->old_connector_state),
-							    GFP_KERNEL);
-
-			work->num_new_connectors = hweight32(crtc_state->connector_mask);
-			work->new_connector_state = kcalloc(work->num_new_connectors,
-							    sizeof(*work->new_connector_state),
-							    GFP_KERNEL);
-
-			if (!work->old_connector_state || !work->new_connector_state)
-				return -ENOMEM;
-		}
-	}
+		ret = intel_crtc_wait_for_pending_flips(crtc);
+		if (ret)
+			return ret;
 
-	if (intel_state->modeset && nonblock) {
-		DRM_DEBUG_ATOMIC("Nonblock modesets are not yet supported!\n");
-		return -EINVAL;
+		if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
+			flush_workqueue(dev_priv->wq);
 	}
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -12901,15 +13530,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 			struct intel_plane_state *intel_plane_state =
 				to_intel_plane_state(plane_state);
 
-			if (plane_state->fence) {
-				long lret = fence_wait(plane_state->fence, true);
-
-				if (lret < 0) {
-					ret = lret;
-					break;
-				}
-			}
-
 			if (!intel_plane_state->wait_req)
 				continue;
 
@@ -12939,157 +13559,69 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
 	return dev->driver->get_vblank_counter(dev, crtc->pipe);
 }
 
-static void intel_prepare_work(struct drm_crtc *crtc,
-			       struct intel_flip_work *work,
-			       struct drm_atomic_state *state,
-			       struct drm_crtc_state *old_crtc_state)
+static void intel_atomic_wait_for_vblanks(struct drm_device *dev,
+					  struct drm_i915_private *dev_priv,
+					  unsigned crtc_mask)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_plane_state *old_plane_state;
-	struct drm_plane *plane;
-	int i, j = 0;
+	unsigned last_vblank_count[I915_MAX_PIPES];
+	enum pipe pipe;
+	int ret;
 
-	INIT_WORK(&work->unpin_work, intel_unpin_work_fn);
-	INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-	atomic_inc(&intel_crtc->unpin_work_count);
+	if (!crtc_mask)
+		return;
 
-	for_each_plane_in_state(state, plane, old_plane_state, i) {
-		struct intel_plane_state *old_state = to_intel_plane_state(old_plane_state);
-		struct intel_plane_state *new_state = to_intel_plane_state(plane->state);
+	for_each_pipe(dev_priv, pipe) {
+		struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 
-		if (old_state->base.crtc != crtc &&
-		    new_state->base.crtc != crtc)
+		if (!((1 << pipe) & crtc_mask))
 			continue;
 
-		if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
-			plane->fb = new_state->base.fb;
-			crtc->x = new_state->base.src_x >> 16;
-			crtc->y = new_state->base.src_y >> 16;
+		ret = drm_crtc_vblank_get(crtc);
+		if (WARN_ON(ret != 0)) {
+			crtc_mask &= ~(1 << pipe);
+			continue;
 		}
 
-		old_state->wait_req = new_state->wait_req;
-		new_state->wait_req = NULL;
-
-		old_state->base.fence = new_state->base.fence;
-		new_state->base.fence = NULL;
-
-		/* remove plane state from the atomic state and move it to work */
-		old_plane_state->state = NULL;
-		state->planes[i] = NULL;
-		state->plane_states[i] = NULL;
-
-		work->old_plane_state[j] = old_state;
-		work->new_plane_state[j++] = new_state;
+		last_vblank_count[pipe] = drm_crtc_vblank_count(crtc);
 	}
 
-	old_crtc_state->state = NULL;
-	state->crtcs[drm_crtc_index(crtc)] = NULL;
-	state->crtc_states[drm_crtc_index(crtc)] = NULL;
-
-	work->old_crtc_state = to_intel_crtc_state(old_crtc_state);
-	work->new_crtc_state = to_intel_crtc_state(crtc->state);
-	work->num_planes = j;
-
-	work->event = crtc->state->event;
-	crtc->state->event = NULL;
-
-	if (needs_modeset(crtc->state) || work->new_crtc_state->update_pipe) {
-		struct drm_connector *conn;
-		struct drm_connector_state *old_conn_state;
-		int k = 0;
-
-		j = 0;
-
-		/*
-		 * intel_unpin_work_fn cannot depend on the connector list
-		 * because it may be freed from underneath it, so add
-		 * them all to the work struct while we're holding locks.
-		 */
-		for_each_connector_in_state(state, conn, old_conn_state, i) {
-			if (old_conn_state->crtc == crtc) {
-				work->old_connector_state[j++] = old_conn_state;
-
-				state->connectors[i] = NULL;
-				state->connector_states[i] = NULL;
-			}
-		}
-
-		/* If another crtc has stolen the connector from state,
-		 * then for_each_connector_in_state is no longer reliable,
-		 * so use drm_for_each_connector here.
-		 */
-		drm_for_each_connector(conn, state->dev)
-			if (conn->state->crtc == crtc)
-				work->new_connector_state[k++] = conn->state;
-
-		WARN(j != work->num_old_connectors, "j = %i, expected %i\n", j, work->num_old_connectors);
-		WARN(k != work->num_new_connectors, "k = %i, expected %i\n", k, work->num_new_connectors);
-	} else if (!work->new_crtc_state->update_wm_post)
-		work->can_async_unpin = true;
-
-	work->fb_bits = work->new_crtc_state->fb_bits;
-}
-
-static void intel_schedule_unpin(struct drm_crtc *crtc,
-				 struct intel_atomic_state *state,
-				 struct intel_flip_work *work)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	to_intel_crtc(crtc)->config = work->new_crtc_state;
-
-	queue_work(dev_priv->wq, &work->unpin_work);
-}
+	for_each_pipe(dev_priv, pipe) {
+		struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		long lret;
 
-static void intel_schedule_flip(struct drm_crtc *crtc,
-				struct intel_atomic_state *state,
-				struct intel_flip_work *work,
-				bool nonblock)
-{
-	struct intel_crtc_state *crtc_state = work->new_crtc_state;
+		if (!((1 << pipe) & crtc_mask))
+			continue;
 
-	if (crtc_state->base.planes_changed ||
-	    needs_modeset(&crtc_state->base) ||
-	    crtc_state->update_pipe) {
-		if (nonblock)
-			schedule_work(&work->mmio_work);
-		else
-			intel_mmio_flip_work_func(&work->mmio_work);
-	} else {
-		int ret;
+		lret = wait_event_timeout(dev->vblank[pipe].queue,
+				last_vblank_count[pipe] !=
+					drm_crtc_vblank_count(crtc),
+				msecs_to_jiffies(50));
 
-		ret = drm_crtc_vblank_get(crtc);
-		I915_STATE_WARN(ret < 0, "enabling vblank failed with %i\n", ret);
+		WARN(!lret, "pipe %c vblank wait timed out\n", pipe_name(pipe));
 
-		work->flip_queued_vblank = intel_crtc_get_vblank_counter(to_intel_crtc(crtc));
-		smp_mb__before_atomic();
-		atomic_set(&work->pending, 1);
+		drm_crtc_vblank_put(crtc);
 	}
 }
 
-static void intel_schedule_update(struct drm_crtc *crtc,
-				  struct intel_atomic_state *state,
-				  struct intel_flip_work *work,
-				  bool nonblock)
+static bool needs_vblank_wait(struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc->dev;
-	struct intel_crtc_state *pipe_config = work->new_crtc_state;
+	/* fb updated, need to unpin old fb */
+	if (crtc_state->fb_changed)
+		return true;
 
-	if (!pipe_config->base.active && work->can_async_unpin) {
-		INIT_LIST_HEAD(&work->head);
-		intel_schedule_unpin(crtc, state, work);
-		return;
-	}
+	/* wm changes, need vblank before final wm's */
+	if (crtc_state->update_wm_post)
+		return true;
 
-	spin_lock_irq(&dev->event_lock);
-	list_add_tail(&work->head, &to_intel_crtc(crtc)->flip_work);
-	spin_unlock_irq(&dev->event_lock);
+	/*
+	 * cxsr is re-enabled after vblank.
+	 * This is already handled by crtc_state->update_wm_post,
+	 * but added for clarity.
+	 */
+	if (crtc_state->disable_cxsr)
+		return true;
 
-	if (!pipe_config->base.active)
-		intel_schedule_unpin(crtc, state, work);
-	else
-		intel_schedule_flip(crtc, state, work, nonblock);
+	return false;
 }
 
 /**
@@ -13116,7 +13648,11 @@ static int intel_atomic_commit(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
+	struct intel_crtc_state *intel_cstate;
 	int ret = 0, i;
+	bool hw_check = intel_state->modeset;
+	unsigned long put_domains[I915_MAX_PIPES] = {};
+	unsigned crtc_vblank_mask = 0;
 
 	ret = intel_atomic_prepare_commit(dev, state, nonblock);
 	if (ret) {
@@ -13134,20 +13670,27 @@ static int intel_atomic_commit(struct drm_device *dev,
 		       sizeof(intel_state->min_pixclk));
 		dev_priv->active_crtcs = intel_state->active_crtcs;
 		dev_priv->atomic_cdclk_freq = intel_state->cdclk;
+
+		intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
 	}
 
 	for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
+		if (needs_modeset(crtc->state) ||
+		    to_intel_crtc_state(crtc->state)->update_pipe) {
+			hw_check = true;
+
+			put_domains[to_intel_crtc(crtc)->pipe] =
+				modeset_get_crtc_power_domains(crtc,
+					to_intel_crtc_state(crtc->state));
+		}
+
 		if (!needs_modeset(crtc->state))
 			continue;
 
 		intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 
-		intel_state->work[i]->put_power_domains =
-			modeset_get_crtc_power_domains(crtc,
-				to_intel_crtc_state(crtc->state));
-
 		if (old_crtc_state->active) {
 			intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
 			dev_priv->display.crtc_disable(crtc);
@@ -13184,9 +13727,11 @@ static int intel_atomic_commit(struct drm_device *dev,
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
 	for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
-		struct intel_flip_work *work = intel_state->work[i];
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 		bool modeset = needs_modeset(crtc->state);
+		struct intel_crtc_state *pipe_config =
+			to_intel_crtc_state(crtc->state);
+		bool update_pipe = !modeset && pipe_config->update_pipe;
 
 		if (modeset && crtc->state->active) {
 			update_scanline_offset(to_intel_crtc(crtc));
@@ -13196,30 +13741,53 @@ static int intel_atomic_commit(struct drm_device *dev,
 		if (!modeset)
 			intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 
-		if (!work) {
-			if (!list_empty_careful(&intel_crtc->flip_work)) {
-				spin_lock_irq(&dev->event_lock);
-				if (!list_empty(&intel_crtc->flip_work))
-					work = list_last_entry(&intel_crtc->flip_work,
-							       struct intel_flip_work, head);
-
-				if (work && work->new_crtc_state == to_intel_crtc_state(old_crtc_state)) {
-					work->free_new_crtc_state = true;
-					state->crtc_states[i] = NULL;
-					state->crtcs[i] = NULL;
-				}
-				spin_unlock_irq(&dev->event_lock);
-			}
-			continue;
-		}
+		if (crtc->state->active &&
+		    drm_atomic_get_existing_plane_state(state, crtc->primary))
+			intel_fbc_enable(intel_crtc);
 
-		intel_state->work[i] = NULL;
-		intel_prepare_work(crtc, work, state, old_crtc_state);
-		intel_schedule_update(crtc, intel_state, work, nonblock);
+		if (crtc->state->active &&
+		    (crtc->state->planes_changed || update_pipe))
+			drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
+
+		if (pipe_config->base.active && needs_vblank_wait(pipe_config))
+			crtc_vblank_mask |= 1 << i;
 	}
 
 	/* FIXME: add subpixel order */
 
+	if (!state->legacy_cursor_update)
+		intel_atomic_wait_for_vblanks(dev, dev_priv, crtc_vblank_mask);
+
+	/*
+	 * Now that the vblank has passed, we can go ahead and program the
+	 * optimal watermarks on platforms that need two-step watermark
+	 * programming.
+	 *
+	 * TODO: Move this (and other cleanup) to an async worker eventually.
+	 */
+	for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+		intel_cstate = to_intel_crtc_state(crtc->state);
+
+		if (dev_priv->display.optimize_watermarks)
+			dev_priv->display.optimize_watermarks(intel_cstate);
+	}
+
+	for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+		intel_post_plane_update(to_intel_crtc_state(old_crtc_state));
+
+		if (put_domains[i])
+			modeset_put_power_domains(dev_priv, put_domains[i]);
+
+		intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
+	}
+
+	if (intel_state->modeset)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
+
+	mutex_lock(&dev->struct_mutex);
+	drm_atomic_helper_cleanup_planes(dev, state);
+	mutex_unlock(&dev->struct_mutex);
+
 	drm_atomic_state_free(state);
 
 	/* As one of the primary mmio accessors, KMS has a high likelihood
@@ -13283,38 +13851,11 @@ static const struct drm_crtc_funcs intel_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.set_property = drm_atomic_helper_crtc_set_property,
 	.destroy = intel_crtc_destroy,
-	.page_flip = drm_atomic_helper_page_flip,
+	.page_flip = intel_crtc_page_flip,
 	.atomic_duplicate_state = intel_crtc_duplicate_state,
 	.atomic_destroy_state = intel_crtc_destroy_state,
 };
 
-static struct fence *intel_get_excl_fence(struct drm_i915_gem_object *obj)
-{
-	struct reservation_object *resv;
-
-
-	if (!obj->base.dma_buf)
-		return NULL;
-
-	resv = obj->base.dma_buf->resv;
-
-	/* For framebuffer backed by dmabuf, wait for fence */
-	while (1) {
-		struct fence *fence_excl, *ret = NULL;
-
-		rcu_read_lock();
-
-		fence_excl = rcu_dereference(resv->fence_excl);
-		if (fence_excl)
-			ret = fence_get_rcu(fence_excl);
-
-		rcu_read_unlock();
-
-		if (ret == fence_excl)
-			return ret;
-	}
-}
-
 /**
  * intel_prepare_plane_fb - Prepare fb for usage on plane
  * @plane: drm plane to prepare for
@@ -13338,20 +13879,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-	struct drm_crtc *crtc = new_state->crtc ?: plane->state->crtc;
 	int ret = 0;
 
 	if (!obj && !old_obj)
 		return 0;
 
-	if (WARN_ON(!new_state->state) || WARN_ON(!crtc) ||
-	    WARN_ON(!to_intel_atomic_state(new_state->state)->work[to_intel_crtc(crtc)->pipe])) {
-		if (WARN_ON(old_obj != obj))
-			return -EINVAL;
-
-		return 0;
-	}
-
 	if (old_obj) {
 		struct drm_crtc_state *crtc_state =
 			drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc);
@@ -13376,6 +13908,19 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		}
 	}
 
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj && obj->base.dma_buf) {
+		long lret;
+
+		lret = reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							   false, true,
+							   MAX_SCHEDULE_TIMEOUT);
+		if (lret == -ERESTARTSYS)
+			return lret;
+
+		WARN(lret < 0, "waiting returns %li\n", lret);
+	}
+
 	if (!obj) {
 		ret = 0;
 	} else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
@@ -13395,8 +13940,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
 			i915_gem_request_assign(&plane_state->wait_req,
 						obj->last_write_req);
-
-			plane_state->base.fence = intel_get_excl_fence(obj);
 		}
 
 		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
@@ -13439,9 +13982,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
 
 	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
-
-	fence_put(old_intel_state->base.fence);
-	old_intel_state->base.fence = NULL;
 }
 
 int
@@ -13501,6 +14041,40 @@ intel_check_primary_plane(struct drm_plane *plane,
 					     &state->visible);
 }
 
+static void intel_begin_crtc_commit(struct drm_crtc *crtc,
+				    struct drm_crtc_state *old_crtc_state)
+{
+	struct drm_device *dev = crtc->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *old_intel_state =
+		to_intel_crtc_state(old_crtc_state);
+	bool modeset = needs_modeset(crtc->state);
+
+	/* Perform vblank evasion around commit operation */
+	intel_pipe_update_start(intel_crtc);
+
+	if (modeset)
+		return;
+
+	if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
+		intel_color_set_csc(crtc->state);
+		intel_color_load_luts(crtc->state);
+	}
+
+	if (to_intel_crtc_state(crtc->state)->update_pipe)
+		intel_update_pipe_config(intel_crtc, old_intel_state);
+	else if (INTEL_INFO(dev)->gen >= 9)
+		skl_detach_scalers(intel_crtc);
+}
+
+static void intel_finish_crtc_commit(struct drm_crtc *crtc,
+				     struct drm_crtc_state *old_crtc_state)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	intel_pipe_update_end(intel_crtc, NULL);
+}
+
 /**
  * intel_plane_destroy - destroy a plane
  * @plane: plane to destroy
@@ -13811,8 +14385,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	intel_crtc->base.state = &crtc_state->base;
 	crtc_state->base.crtc = &intel_crtc->base;
 
-	INIT_LIST_HEAD(&intel_crtc->flip_work);
-
 	/* initialize shared scalers */
 	if (INTEL_INFO(dev)->gen >= 9) {
 		if (pipe == PIPE_C)
@@ -14568,6 +15140,34 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 		dev_priv->display.modeset_calc_cdclk =
 			skl_modeset_calc_cdclk;
 	}
+
+	switch (INTEL_INFO(dev_priv)->gen) {
+	case 2:
+		dev_priv->display.queue_flip = intel_gen2_queue_flip;
+		break;
+
+	case 3:
+		dev_priv->display.queue_flip = intel_gen3_queue_flip;
+		break;
+
+	case 4:
+	case 5:
+		dev_priv->display.queue_flip = intel_gen4_queue_flip;
+		break;
+
+	case 6:
+		dev_priv->display.queue_flip = intel_gen6_queue_flip;
+		break;
+	case 7:
+	case 8: /* FIXME(BDW): Check that the gen8 RCS flip works. */
+		dev_priv->display.queue_flip = intel_gen7_queue_flip;
+		break;
+	case 9:
+		/* Drop through - unsupported since execlist only. */
+	default:
+		/* Default just returns -ENODEV to indicate unsupported */
+		dev_priv->display.queue_flip = intel_default_queue_flip;
+	}
 }
 
 /*
@@ -15526,9 +16126,9 @@ void intel_modeset_gem_init(struct drm_device *dev)
 			DRM_ERROR("failed to pin boot fb on pipe %d\n",
 				  to_intel_crtc(c)->pipe);
 			drm_framebuffer_unreference(c->primary->fb);
-			drm_framebuffer_unreference(c->primary->state->fb);
-			c->primary->fb = c->primary->state->fb = NULL;
+			c->primary->fb = NULL;
 			c->primary->crtc = c->primary->state->crtc = NULL;
+			update_state_fb(c->primary);
 			c->state->plane_mask &= ~(1 << drm_plane_index(c->primary));
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 03d4b1ade2d1..9b5f6634c558 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -304,8 +304,6 @@ struct intel_atomic_state {
 	unsigned int active_crtcs;
 	unsigned int min_pixclk[I915_MAX_PIPES];
 
-	struct intel_flip_work *work[I915_MAX_PIPES];
-
 	/* SKL/KBL Only */
 	unsigned int cdclk_pll_vco;
 
@@ -646,7 +644,7 @@ struct intel_crtc {
 	unsigned long enabled_power_domains;
 	bool lowfreq_avail;
 	struct intel_overlay *overlay;
-	struct list_head flip_work;
+	struct intel_flip_work *flip_work;
 
 	atomic_t unpin_work_count;
 
@@ -664,6 +662,9 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
+	/* reset counter value when the last flip was submitted */
+	unsigned int reset_counter;
+
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
 	bool pch_fifo_underrun_disabled;
@@ -972,28 +973,20 @@ intel_get_crtc_for_plane(struct drm_device *dev, int plane)
 }
 
 struct intel_flip_work {
-	struct list_head head;
-
 	struct work_struct unpin_work;
 	struct work_struct mmio_work;
 
+	struct drm_crtc *crtc;
+	struct drm_framebuffer *old_fb;
+	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
 	atomic_t pending;
+	u32 flip_count;
+	u32 gtt_offset;
+	struct drm_i915_gem_request *flip_queued_req;
 	u32 flip_queued_vblank;
-
-	unsigned put_power_domains;
-	unsigned num_planes;
-
-	bool can_async_unpin, free_new_crtc_state;
-	unsigned fb_bits;
-
-	unsigned num_old_connectors, num_new_connectors;
-	struct drm_connector_state **old_connector_state;
-	struct drm_connector_state **new_connector_state;
-
-	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
-	struct intel_plane_state *old_plane_state[I915_MAX_PLANES + 1];
-	struct intel_plane_state *new_plane_state[I915_MAX_PLANES + 1];
+	u32 flip_ready_vblank;
+	unsigned int rotation;
 };
 
 struct intel_load_detect_pipe {
@@ -1153,7 +1146,6 @@ unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info
 bool intel_has_pending_fb_unpin(struct drm_device *dev);
 void intel_mark_busy(struct drm_i915_private *dev_priv);
 void intel_mark_idle(struct drm_i915_private *dev_priv);
-void intel_free_flip_work(struct intel_flip_work *work);
 void intel_crtc_restore_mode(struct drm_crtc *crtc);
 int intel_display_suspend(struct drm_device *dev);
 void intel_encoder_destroy(struct drm_encoder *encoder);
@@ -1206,8 +1198,9 @@ struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
 			   struct drm_mode_fb_cmd2 *mode_cmd,
 			   struct drm_i915_gem_object *obj);
+void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe);
 void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe);
-
+void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe);
 int intel_prepare_plane_fb(struct drm_plane *plane,
 			   const struct drm_plane_state *new_state);
 void intel_cleanup_plane_fb(struct drm_plane *plane,
@@ -1430,15 +1423,11 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev)
 void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 			   struct drm_atomic_state *state);
 bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
-void intel_fbc_pre_update(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state,
-			  struct intel_plane_state *plane_state);
+void intel_fbc_pre_update(struct intel_crtc *crtc);
 void intel_fbc_post_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
 void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv);
-void intel_fbc_enable(struct intel_crtc *crtc,
-		      struct intel_crtc_state *crtc_state,
-		      struct intel_plane_state *plane_state);
+void intel_fbc_enable(struct intel_crtc *crtc);
 void intel_fbc_disable(struct intel_crtc *crtc);
 void intel_fbc_global_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index d2b0269b2fe4..0dea5fbcd8aa 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -480,10 +480,10 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv)
 		intel_fbc_hw_deactivate(dev_priv);
 }
 
-static bool multiple_pipes_ok(struct intel_crtc *crtc,
-			      struct intel_plane_state *plane_state)
+static bool multiple_pipes_ok(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_plane *primary = crtc->base.primary;
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	enum pipe pipe = crtc->pipe;
 
@@ -491,7 +491,9 @@ static bool multiple_pipes_ok(struct intel_crtc *crtc,
 	if (!no_fbc_on_multiple_pipes(dev_priv))
 		return true;
 
-	if (plane_state->visible)
+	WARN_ON(!drm_modeset_is_locked(&primary->mutex));
+
+	if (to_intel_plane_state(primary->state)->visible)
 		fbc->visible_pipes_mask |= (1 << pipe);
 	else
 		fbc->visible_pipes_mask &= ~(1 << pipe);
@@ -706,16 +708,21 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
-static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
-					 struct intel_crtc_state *crtc_state,
-					 struct intel_plane_state *plane_state)
+static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(crtc->base.primary->state);
 	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj;
 
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.primary->mutex));
+
 	cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		cache->crtc.hsw_bdw_pixel_rate =
@@ -880,9 +887,7 @@ static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1,
 	return memcmp(params1, params2, sizeof(*params1)) == 0;
 }
 
-void intel_fbc_pre_update(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state,
-			  struct intel_plane_state *plane_state)
+void intel_fbc_pre_update(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
@@ -892,7 +897,7 @@ void intel_fbc_pre_update(struct intel_crtc *crtc,
 
 	mutex_lock(&fbc->lock);
 
-	if (!multiple_pipes_ok(crtc, plane_state)) {
+	if (!multiple_pipes_ok(crtc)) {
 		fbc->no_fbc_reason = "more than one pipe active";
 		goto deactivate;
 	}
@@ -900,7 +905,7 @@ void intel_fbc_pre_update(struct intel_crtc *crtc,
 	if (!fbc->enabled || fbc->crtc != crtc)
 		goto unlock;
 
-	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
+	intel_fbc_update_state_cache(crtc);
 
 deactivate:
 	intel_fbc_deactivate(dev_priv);
@@ -1084,9 +1089,7 @@ out:
  * intel_fbc_enable multiple times for the same pipe without an
  * intel_fbc_disable in the middle, as long as it is deactivated.
  */
-void intel_fbc_enable(struct intel_crtc *crtc,
-		      struct intel_crtc_state *crtc_state,
-		      struct intel_plane_state *plane_state)
+void intel_fbc_enable(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
@@ -1099,19 +1102,19 @@ void intel_fbc_enable(struct intel_crtc *crtc,
 	if (fbc->enabled) {
 		WARN_ON(fbc->crtc == NULL);
 		if (fbc->crtc == crtc) {
-			WARN_ON(!crtc_state->enable_fbc);
+			WARN_ON(!crtc->config->enable_fbc);
 			WARN_ON(fbc->active);
 		}
 		goto out;
 	}
 
-	if (!crtc_state->enable_fbc)
+	if (!crtc->config->enable_fbc)
 		goto out;
 
 	WARN_ON(fbc->active);
 	WARN_ON(fbc->crtc != NULL);
 
-	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
+	intel_fbc_update_state_cache(crtc);
 	if (intel_fbc_alloc_cfb(crtc)) {
 		fbc->no_fbc_reason = "not enough stolen memory";
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 53715037ab54..5c191a1afaaf 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -260,7 +260,9 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl
 	if (enable_execlists == 0)
 		return 0;
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && USES_PPGTT(dev_priv))
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
+	    USES_PPGTT(dev_priv) &&
+	    i915.use_mmio_flip >= 0)
 		return 1;
 
 	return 0;