summary refs log tree commit diff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c45
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c23
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c12
-rw-r--r--drivers/gpu/drm/i915/i915_request.c9
-rw-r--r--drivers/gpu/drm/i915/i915_vgpu.c1
-rw-r--r--drivers/gpu/drm/mcde/mcde_dsi.c70
-rw-r--r--drivers/gpu/drm/msm/Kconfig2
-rw-r--r--drivers/gpu/drm/msm/Makefile1
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_debugfs.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c16
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c95
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h7
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c75
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h11
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c3
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c44
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c3
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h9
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c112
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h10
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c9
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c23
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c11
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c51
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c132
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c3
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c4
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c60
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c1
-rw-r--r--drivers/gpu/drm/msm/disp/mdp_format.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c18
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c12
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/pll/dsi_pll.h2
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.c66
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi.h4
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_bridge.c2
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_connector.c43
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c1
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c2
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c2
-rw-r--r--drivers/gpu/drm/msm/msm_atomic.c232
-rw-r--r--drivers/gpu/drm/msm/msm_atomic_trace.h110
-rw-r--r--drivers/gpu/drm/msm/msm_atomic_tracepoints.c3
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c5
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c51
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h6
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c2
-rw-r--r--drivers/gpu/drm/msm/msm_fbdev.c4
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gem_prime.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c8
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_trace.h2
-rw-r--r--drivers/gpu/drm/msm/msm_gpummu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_kms.h108
-rw-r--r--drivers/gpu/drm/msm/msm_perf.c3
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c7
-rw-r--r--drivers/gpu/drm/msm/msm_submitqueue.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.c10
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.h3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c16
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c11
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c43
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c91
85 files changed, 1211 insertions, 613 deletions
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index df8f2c8adb2b..f5918707672f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -390,6 +390,12 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
 
 	WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
 
+	dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
+		     shmem->sgt->nents, DMA_BIDIRECTIONAL);
+	sg_free_table(shmem->sgt);
+	kfree(shmem->sgt);
+	shmem->sgt = NULL;
+
 	drm_gem_shmem_put_pages_locked(shmem);
 
 	shmem->madv = -1;
@@ -409,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
 
-void drm_gem_shmem_purge(struct drm_gem_object *obj)
+bool drm_gem_shmem_purge(struct drm_gem_object *obj)
 {
 	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
 
-	mutex_lock(&shmem->pages_lock);
+	if (!mutex_trylock(&shmem->pages_lock))
+		return false;
 	drm_gem_shmem_purge_locked(obj);
 	mutex_unlock(&shmem->pages_lock);
+
+	return true;
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 595539a09e38..261c9bd83f51 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -265,7 +265,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
 				       PIN_NONBLOCK /* NOWARN */ |
-				       PIN_NOSEARCH);
+				       PIN_NOEVICT);
 	if (IS_ERR(vma)) {
 		/* Use a partial view if it is bigger than available space */
 		struct i915_ggtt_view view =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index a372d4ea9370..65b5ca74b394 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -39,27 +39,32 @@ static int __engine_unpark(struct intel_wakeref *wf)
 
 #if IS_ENABLED(CONFIG_LOCKDEP)
 
-static inline void __timeline_mark_lock(struct intel_context *ce)
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-	local_irq_restore(flags);
+
+	return flags;
 }
 
-static inline void __timeline_mark_unlock(struct intel_context *ce)
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+					  unsigned long flags)
 {
 	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+	local_irq_restore(flags);
 }
 
 #else
 
-static inline void __timeline_mark_lock(struct intel_context *ce)
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 {
+	return 0;
 }
 
-static inline void __timeline_mark_unlock(struct intel_context *ce)
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+					  unsigned long flags)
 {
 }
 
@@ -68,6 +73,8 @@ static inline void __timeline_mark_unlock(struct intel_context *ce)
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
+	unsigned long flags;
+	bool result = true;
 
 	/* Already inside the kernel context, safe to power down. */
 	if (engine->wakeref_serial == engine->serial)
@@ -89,12 +96,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
 	 */
-	__timeline_mark_lock(engine->kernel_context);
+	flags = __timeline_mark_lock(engine->kernel_context);
 
 	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
-		return true;
+		goto out_unlock;
 
 	intel_timeline_enter(rq->timeline);
 
@@ -110,9 +117,10 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	__intel_wakeref_defer_park(&engine->wakeref);
 	__i915_request_queue(rq, NULL);
 
-	__timeline_mark_unlock(engine->kernel_context);
-
-	return false;
+	result = false;
+out_unlock:
+	__timeline_mark_unlock(engine->kernel_context, flags);
+	return result;
 }
 
 static int __engine_park(struct intel_wakeref *wf)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 077716442c90..b9d84d52e986 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -792,6 +792,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 	struct intel_timeline *tl;
+	unsigned long flags;
 
 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
 		return true;
@@ -811,7 +812,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	spin_lock(&timelines->lock);
+	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
@@ -819,7 +820,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		if (!rq)
 			continue;
 
-		spin_unlock(&timelines->lock);
+		spin_unlock_irqrestore(&timelines->lock, flags);
 
 		/*
 		 * All internal dependencies (i915_requests) will have
@@ -832,10 +833,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		i915_request_put(rq);
 
 		/* Restart iteration after droping lock */
-		spin_lock(&timelines->lock);
+		spin_lock_irqsave(&timelines->lock, flags);
 		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	spin_unlock(&timelines->lock);
+	spin_unlock_irqrestore(&timelines->lock, flags);
 
 	intel_gt_sanitize(gt, false);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 02fbe11b671b..9cb01d9828f1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -337,6 +337,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
 void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+	unsigned long flags;
 
 	lockdep_assert_held(&tl->mutex);
 
@@ -345,14 +346,15 @@ void intel_timeline_enter(struct intel_timeline *tl)
 		return;
 	GEM_BUG_ON(!tl->active_count); /* overflow? */
 
-	spin_lock(&timelines->lock);
+	spin_lock_irqsave(&timelines->lock, flags);
 	list_add(&tl->link, &timelines->active_list);
-	spin_unlock(&timelines->lock);
+	spin_unlock_irqrestore(&timelines->lock, flags);
 }
 
 void intel_timeline_exit(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+	unsigned long flags;
 
 	lockdep_assert_held(&tl->mutex);
 
@@ -360,9 +362,9 @@ void intel_timeline_exit(struct intel_timeline *tl)
 	if (--tl->active_count)
 		return;
 
-	spin_lock(&timelines->lock);
+	spin_lock_irqsave(&timelines->lock, flags);
 	list_del(&tl->link);
-	spin_unlock(&timelines->lock);
+	spin_unlock_irqrestore(&timelines->lock, flags);
 
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1a28e3666951..37f502ca95ad 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -568,6 +568,16 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	return 0;
 }
 
+static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	u32 ring_base;
+
+	ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
+	vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start;
+}
+
 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
@@ -1016,6 +1026,13 @@ static int workload_thread(void *priv)
 		if (need_force_wake)
 			intel_uncore_forcewake_get(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);
+		/*
+		 * Update the vReg of the vGPU which submitted this
+		 * workload. The vGPU may use these registers for checking
+		 * the context state. The value comes from GPU commands
+		 * in this workload.
+		 */
+		update_vreg_in_ctx(workload);
 
 		ret = dispatch_workload(workload);
 
@@ -1438,9 +1455,6 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
 		((a)->lrca == (b)->lrca))
 
-#define get_last_workload(q) \
-	(list_empty(q) ? NULL : container_of(q->prev, \
-	struct intel_vgpu_workload, list))
 /**
  * intel_vgpu_create_workload - create a vGPU workload
  * @vgpu: a vGPU
@@ -1460,7 +1474,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct list_head *q = workload_q_head(vgpu, ring_id);
-	struct intel_vgpu_workload *last_workload = get_last_workload(q);
+	struct intel_vgpu_workload *last_workload = NULL;
 	struct intel_vgpu_workload *workload = NULL;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	u64 ring_context_gpa;
@@ -1486,15 +1500,20 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 	head &= RB_HEAD_OFF_MASK;
 	tail &= RB_TAIL_OFF_MASK;
 
-	if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
-		gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
-		gvt_dbg_el("ctx head %x real head %lx\n", head,
-				last_workload->rb_tail);
-		/*
-		 * cannot use guest context head pointer here,
-		 * as it might not be updated at this time
-		 */
-		head = last_workload->rb_tail;
+	list_for_each_entry_reverse(last_workload, q, list) {
+
+		if (same_context(&last_workload->ctx_desc, desc)) {
+			gvt_dbg_el("ring id %d cur workload == last\n",
+					ring_id);
+			gvt_dbg_el("ctx head %x real head %lx\n", head,
+					last_workload->rb_tail);
+			/*
+			 * cannot use guest context head pointer here,
+			 * as it might not be updated at this time
+			 */
+			head = last_workload->rb_tail;
+			break;
+		}
 	}
 
 	gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d725e0bba40..772154e4073e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -961,6 +961,7 @@ struct i915_frontbuffer_tracking {
 };
 
 struct i915_virtual_gpu {
+	struct mutex lock; /* serialises sending of g2v_notify command pkts */
 	bool active;
 	u32 caps;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eb31b69a316a..95e7c52cf8ed 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -889,12 +889,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 
 static long
 wait_for_timelines(struct drm_i915_private *i915,
-		   unsigned int flags, long timeout)
+		   unsigned int wait, long timeout)
 {
 	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl;
+	unsigned long flags;
 
-	spin_lock(&timelines->lock);
+	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
@@ -902,7 +903,7 @@ wait_for_timelines(struct drm_i915_private *i915,
 		if (!rq)
 			continue;
 
-		spin_unlock(&timelines->lock);
+		spin_unlock_irqrestore(&timelines->lock, flags);
 
 		/*
 		 * "Race-to-idle".
@@ -913,19 +914,19 @@ wait_for_timelines(struct drm_i915_private *i915,
 		 * want to complete as quickly as possible to avoid prolonged
 		 * stalls, so allow the gpu to boost to maximum clocks.
 		 */
-		if (flags & I915_WAIT_FOR_IDLE_BOOST)
+		if (wait & I915_WAIT_FOR_IDLE_BOOST)
 			gen6_rps_boost(rq);
 
-		timeout = i915_request_wait(rq, flags, timeout);
+		timeout = i915_request_wait(rq, wait, timeout);
 		i915_request_put(rq);
 		if (timeout < 0)
 			return timeout;
 
 		/* restart after reacquiring the lock */
-		spin_lock(&timelines->lock);
+		spin_lock_irqsave(&timelines->lock, flags);
 		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	spin_unlock(&timelines->lock);
+	spin_unlock_irqrestore(&timelines->lock, flags);
 
 	return timeout;
 }
@@ -1026,6 +1027,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			return ERR_PTR(ret);
 	}
 
+	if (vma->fence && !i915_gem_object_is_tiled(obj)) {
+		mutex_lock(&vma->vm->mutex);
+		ret = i915_vma_revoke_fence(vma);
+		mutex_unlock(&vma->vm->mutex);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0b81e0b64393..b1a7a8b9b46a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -827,10 +827,9 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
 	ppgtt->pd_dirty_engines = ALL_ENGINES;
 }
 
-static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
-	struct i915_address_space *vm = &ppgtt->vm;
-	struct drm_i915_private *dev_priv = vm->i915;
+	struct drm_i915_private *dev_priv = ppgtt->vm.i915;
 	enum vgt_g2v_type msg;
 	int i;
 
@@ -839,7 +838,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 	else
 		atomic_dec(px_used(ppgtt->pd));
 
-	if (i915_vm_is_4lvl(vm)) {
+	mutex_lock(&dev_priv->vgpu.lock);
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
 		const u64 daddr = px_dma(ppgtt->pd);
 
 		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
@@ -859,9 +860,10 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
 	}
 
+	/* g2v_notify atomically (via hv trap) consumes the message packet. */
 	I915_WRITE(vgtif_reg(g2v_notify), msg);
 
-	return 0;
+	mutex_unlock(&dev_priv->vgpu.lock);
 }
 
 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f1a0a57fc6fc..a53777dd371c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1465,9 +1465,10 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 {
 	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl, *tn;
+	unsigned long flags;
 	LIST_HEAD(free);
 
-	spin_lock(&timelines->lock);
+	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 		if (!mutex_trylock(&tl->mutex))
 			continue;
@@ -1475,11 +1476,11 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 		intel_timeline_get(tl);
 		GEM_BUG_ON(!tl->active_count);
 		tl->active_count++; /* pin the list element */
-		spin_unlock(&timelines->lock);
+		spin_unlock_irqrestore(&timelines->lock, flags);
 
 		retire_requests(tl);
 
-		spin_lock(&timelines->lock);
+		spin_lock_irqsave(&timelines->lock, flags);
 
 		/* Resume iteration after dropping lock */
 		list_safe_reset_next(tl, tn, link);
@@ -1494,7 +1495,7 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 			list_add(&tl->link, &free);
 		}
 	}
-	spin_unlock(&timelines->lock);
+	spin_unlock_irqrestore(&timelines->lock, flags);
 
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 39bebf16edbe..968be26735c5 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -94,6 +94,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv)
 	dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps));
 
 	dev_priv->vgpu.active = true;
+	mutex_init(&dev_priv->vgpu.lock);
 	DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
 
 out:
diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
index 07f7090d08b3..f9c9e32b299c 100644
--- a/drivers/gpu/drm/mcde/mcde_dsi.c
+++ b/drivers/gpu/drm/mcde/mcde_dsi.c
@@ -178,22 +178,26 @@ static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
 	const u32 loop_delay_us = 10; /* us */
 	const u8 *tx = msg->tx_buf;
 	u32 loop_counter;
-	size_t txlen;
+	size_t txlen = msg->tx_len;
+	size_t rxlen = msg->rx_len;
 	u32 val;
 	int ret;
 	int i;
 
-	txlen = msg->tx_len;
-	if (txlen > 12) {
+	if (txlen > 16) {
 		dev_err(d->dev,
-			"dunno how to write more than 12 bytes yet\n");
+			"dunno how to write more than 16 bytes yet\n");
+		return -EIO;
+	}
+	if (rxlen > 4) {
+		dev_err(d->dev,
+			"dunno how to read more than 4 bytes yet\n");
 		return -EIO;
 	}
 
 	dev_dbg(d->dev,
-		"message to channel %d, %zd bytes",
-		msg->channel,
-		txlen);
+		"message to channel %d, write %zd bytes read %zd bytes\n",
+		msg->channel, txlen, rxlen);
 
 	/* Command "nature" */
 	if (MCDE_DSI_HOST_IS_READ(msg->type))
@@ -210,9 +214,7 @@ static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
 	if (mipi_dsi_packet_format_is_long(msg->type))
 		val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LONGNOTSHORT;
 	val |= 0 << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_ID_SHIFT;
-	/* Add one to the length for the MIPI DCS command */
-	val |= txlen
-		<< DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_SIZE_SHIFT;
+	val |= txlen << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_SIZE_SHIFT;
 	val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LP_EN;
 	val |= msg->type << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SHIFT;
 	writel(val, d->regs + DSI_DIRECT_CMD_MAIN_SETTINGS);
@@ -249,17 +251,36 @@ static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
 	writel(1, d->regs + DSI_DIRECT_CMD_SEND);
 
 	loop_counter = 1000 * 1000 / loop_delay_us;
-	while (!(readl(d->regs + DSI_DIRECT_CMD_STS) &
-		 DSI_DIRECT_CMD_STS_WRITE_COMPLETED)
-	       && --loop_counter)
-		usleep_range(loop_delay_us, (loop_delay_us * 3) / 2);
-
-	if (!loop_counter) {
-		dev_err(d->dev, "DSI write timeout!\n");
-		return -ETIME;
+	if (MCDE_DSI_HOST_IS_READ(msg->type)) {
+		/* Read command */
+		while (!(readl(d->regs + DSI_DIRECT_CMD_STS) &
+			 (DSI_DIRECT_CMD_STS_READ_COMPLETED |
+			  DSI_DIRECT_CMD_STS_READ_COMPLETED_WITH_ERR))
+		       && --loop_counter)
+			usleep_range(loop_delay_us, (loop_delay_us * 3) / 2);
+		if (!loop_counter) {
+			dev_err(d->dev, "DSI read timeout!\n");
+			return -ETIME;
+		}
+	} else {
+		/* Writing only */
+		while (!(readl(d->regs + DSI_DIRECT_CMD_STS) &
+			 DSI_DIRECT_CMD_STS_WRITE_COMPLETED)
+		       && --loop_counter)
+			usleep_range(loop_delay_us, (loop_delay_us * 3) / 2);
+
+		if (!loop_counter) {
+			dev_err(d->dev, "DSI write timeout!\n");
+			return -ETIME;
+		}
 	}
 
 	val = readl(d->regs + DSI_DIRECT_CMD_STS);
+	if (val & DSI_DIRECT_CMD_STS_READ_COMPLETED_WITH_ERR) {
+		dev_err(d->dev, "read completed with error\n");
+		writel(1, d->regs + DSI_DIRECT_CMD_RD_INIT);
+		return -EIO;
+	}
 	if (val & DSI_DIRECT_CMD_STS_ACKNOWLEDGE_WITH_ERR_RECEIVED) {
 		val >>= DSI_DIRECT_CMD_STS_ACK_VAL_SHIFT;
 		dev_err(d->dev, "error during transmission: %04x\n",
@@ -269,10 +290,7 @@ static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
 
 	if (!MCDE_DSI_HOST_IS_READ(msg->type)) {
 		/* Return number of bytes written */
-		if (mipi_dsi_packet_format_is_long(msg->type))
-			ret = 4 + txlen;
-		else
-			ret = 4;
+		ret = txlen;
 	} else {
 		/* OK this is a read command, get the response */
 		u32 rdsz;
@@ -282,7 +300,13 @@ static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
 		rdsz = readl(d->regs + DSI_DIRECT_CMD_RD_PROPERTY);
 		rdsz &= DSI_DIRECT_CMD_RD_PROPERTY_RD_SIZE_MASK;
 		rddat = readl(d->regs + DSI_DIRECT_CMD_RDDAT);
-		for (i = 0; i < 4 && i < rdsz; i++)
+		if (rdsz < rxlen) {
+			dev_err(d->dev, "read error, requested %zd got %d\n",
+				rxlen, rdsz);
+			return -EIO;
+		}
+		/* FIXME: read more than 4 bytes */
+		for (i = 0; i < 4 && i < rxlen; i++)
 			rx[i] = (rddat >> (i * 8)) & 0xff;
 		ret = rdsz;
 	}
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 9c37e4de5896..e9160ce39cbb 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -14,11 +14,11 @@ config DRM_MSM
 	select SHMEM
 	select TMPFS
 	select QCOM_SCM if ARCH_QCOM
+	select QCOM_COMMAND_DB if ARCH_QCOM
 	select WANT_DEV_COREDUMP
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	select SYNC_FILE
 	select PM_OPP
-	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.
 
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 7a05cbf2f820..1579cf0d828f 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -75,6 +75,7 @@ msm-y := \
 	disp/dpu1/dpu_rm.o \
 	disp/dpu1/dpu_vbif.o \
 	msm_atomic.o \
+	msm_atomic_tracepoints.o \
 	msm_debugfs.o \
 	msm_drv.o \
 	msm_fb.o \
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
index 9f2dd76bd67a..075ecce4b5e0 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c
@@ -2,9 +2,11 @@
 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  */
 
-
 #include <linux/types.h>
 #include <linux/debugfs.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
 #include <drm/drm_print.h>
 
 #include "a5xx_gpu.h"
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2ca470eb5cb8..85f14feafdec 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1172,7 +1172,7 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
 
 static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu)
 {
-	int ret = msm_clk_bulk_get(gmu->dev, &gmu->clocks);
+	int ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks);
 
 	if (ret < 1)
 		return ret;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 40133a43960c..0888e0df660d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -181,6 +181,7 @@ MODULE_FIRMWARE("qcom/a530_zap.b01");
 MODULE_FIRMWARE("qcom/a530_zap.b02");
 MODULE_FIRMWARE("qcom/a630_sqe.fw");
 MODULE_FIRMWARE("qcom/a630_gmu.bin");
+MODULE_FIRMWARE("qcom/a630_zap.mbn");
 
 static inline bool _rev_match(uint8_t entry, uint8_t id)
 {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index 5cda96875e03..09a49b59bb5b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -214,7 +214,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms,
  */
 void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc)
 {
-	struct drm_crtc *tmp_crtc;
 	struct dpu_crtc *dpu_crtc;
 	struct dpu_crtc_state *dpu_cstate;
 	struct dpu_kms *kms;
@@ -233,22 +232,9 @@ void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc)
 	dpu_crtc = to_dpu_crtc(crtc);
 	dpu_cstate = to_dpu_crtc_state(crtc->state);
 
-	/* only do this for command mode rt client */
-	if (dpu_crtc_get_intf_mode(crtc) != INTF_MODE_CMD)
+	if (atomic_dec_return(&kms->bandwidth_ref) > 0)
 		return;
 
-	/*
-	 * If video interface present, cmd panel bandwidth cannot be
-	 * released.
-	 */
-	if (dpu_crtc_get_intf_mode(crtc) == INTF_MODE_CMD)
-		drm_for_each_crtc(tmp_crtc, crtc->dev) {
-			if (tmp_crtc->enabled &&
-				dpu_crtc_get_intf_mode(tmp_crtc) ==
-						INTF_MODE_VIDEO)
-				return;
-		}
-
 	/* Release the bandwidth */
 	if (kms->perf.enable_bw_release) {
 		trace_dpu_cmd_release_bw(crtc->base.id);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index b3417d56032d..ce59adff06aa 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -9,11 +9,13 @@
 #include <linux/sort.h>
 #include <linux/debugfs.h>
 #include <linux/ktime.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_rect.h>
+#include <drm/drm_vblank.h>
 
 #include "dpu_kms.h"
 #include "dpu_hw_lm.h"
@@ -292,19 +294,6 @@ void dpu_crtc_vblank_callback(struct drm_crtc *crtc)
 	trace_dpu_crtc_vblank_cb(DRMID(crtc));
 }
 
-static void dpu_crtc_release_bw_unlocked(struct drm_crtc *crtc)
-{
-	int ret = 0;
-	struct drm_modeset_acquire_ctx ctx;
-
-	DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, ret);
-	dpu_core_perf_crtc_release_bw(crtc);
-	DRM_MODESET_LOCK_ALL_END(ctx, ret);
-	if (ret)
-		DRM_ERROR("Failed to acquire modeset locks to release bw, %d\n",
-			  ret);
-}
-
 static void dpu_crtc_frame_event_work(struct kthread_work *work)
 {
 	struct dpu_crtc_frame_event *fevent = container_of(work,
@@ -324,17 +313,12 @@ static void dpu_crtc_frame_event_work(struct kthread_work *work)
 				| DPU_ENCODER_FRAME_EVENT_PANEL_DEAD)) {
 
 		if (atomic_read(&dpu_crtc->frame_pending) < 1) {
-			/* this should not happen */
-			DRM_ERROR("crtc%d ev:%u ts:%lld frame_pending:%d\n",
-					crtc->base.id,
-					fevent->event,
-					ktime_to_ns(fevent->ts),
-					atomic_read(&dpu_crtc->frame_pending));
+			/* ignore vblank when not pending */
 		} else if (atomic_dec_return(&dpu_crtc->frame_pending) == 0) {
 			/* release bandwidth and other resources */
 			trace_dpu_crtc_frame_event_done(DRMID(crtc),
 							fevent->event);
-			dpu_crtc_release_bw_unlocked(crtc);
+			dpu_core_perf_crtc_release_bw(crtc);
 		} else {
 			trace_dpu_crtc_frame_event_more_pending(DRMID(crtc),
 								fevent->event);
@@ -407,13 +391,8 @@ static void dpu_crtc_frame_event_cb(void *data, u32 event)
 	kthread_queue_work(&priv->event_thread[crtc_id].worker, &fevent->work);
 }
 
-void dpu_crtc_complete_commit(struct drm_crtc *crtc,
-		struct drm_crtc_state *old_state)
+void dpu_crtc_complete_commit(struct drm_crtc *crtc)
 {
-	if (!crtc || !crtc->state) {
-		DPU_ERROR("invalid crtc\n");
-		return;
-	}
 	trace_dpu_crtc_complete_commit(DRMID(crtc));
 }
 
@@ -623,13 +602,12 @@ static int _dpu_crtc_wait_for_frame_done(struct drm_crtc *crtc)
 	return rc;
 }
 
-void dpu_crtc_commit_kickoff(struct drm_crtc *crtc, bool async)
+void dpu_crtc_commit_kickoff(struct drm_crtc *crtc)
 {
 	struct drm_encoder *encoder;
 	struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc);
 	struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc);
 	struct dpu_crtc_state *cstate = to_dpu_crtc_state(crtc->state);
-	int ret;
 
 	/*
 	 * If no mixers has been allocated in dpu_crtc_atomic_check(),
@@ -647,37 +625,22 @@ void dpu_crtc_commit_kickoff(struct drm_crtc *crtc, bool async)
 	 */
 	drm_for_each_encoder_mask(encoder, crtc->dev,
 				  crtc->state->encoder_mask)
-		dpu_encoder_prepare_for_kickoff(encoder, async);
-
-	if (!async) {
-		/* wait for frame_event_done completion */
-		DPU_ATRACE_BEGIN("wait_for_frame_done_event");
-		ret = _dpu_crtc_wait_for_frame_done(crtc);
-		DPU_ATRACE_END("wait_for_frame_done_event");
-		if (ret) {
-			DPU_ERROR("crtc%d wait for frame done failed;frame_pending%d\n",
-					crtc->base.id,
-					atomic_read(&dpu_crtc->frame_pending));
-			goto end;
-		}
+		dpu_encoder_prepare_for_kickoff(encoder);
 
-		if (atomic_inc_return(&dpu_crtc->frame_pending) == 1) {
-			/* acquire bandwidth and other resources */
-			DPU_DEBUG("crtc%d first commit\n", crtc->base.id);
-		} else
-			DPU_DEBUG("crtc%d commit\n", crtc->base.id);
+	if (atomic_inc_return(&dpu_crtc->frame_pending) == 1) {
+		/* acquire bandwidth and other resources */
+		DPU_DEBUG("crtc%d first commit\n", crtc->base.id);
+	} else
+		DPU_DEBUG("crtc%d commit\n", crtc->base.id);
 
-		dpu_crtc->play_count++;
-	}
+	dpu_crtc->play_count++;
 
 	dpu_vbif_clear_errors(dpu_kms);
 
 	drm_for_each_encoder_mask(encoder, crtc->dev, crtc->state->encoder_mask)
-		dpu_encoder_kickoff(encoder, async);
+		dpu_encoder_kickoff(encoder);
 
-end:
-	if (!async)
-		reinit_completion(&dpu_crtc->frame_done_comp);
+	reinit_completion(&dpu_crtc->frame_done_comp);
 	DPU_ATRACE_END("crtc_commit");
 }
 
@@ -729,6 +692,7 @@ static void dpu_crtc_disable(struct drm_crtc *crtc,
 	struct drm_encoder *encoder;
 	struct msm_drm_private *priv;
 	unsigned long flags;
+	bool release_bandwidth = false;
 
 	if (!crtc || !crtc->dev || !crtc->dev->dev_private || !crtc->state) {
 		DPU_ERROR("invalid crtc\n");
@@ -745,8 +709,15 @@ static void dpu_crtc_disable(struct drm_crtc *crtc,
 	drm_crtc_vblank_off(crtc);
 
 	drm_for_each_encoder_mask(encoder, crtc->dev,
-				  old_crtc_state->encoder_mask)
+				  old_crtc_state->encoder_mask) {
+		/* in video mode, we hold an extra bandwidth reference
+		 * as we cannot drop bandwidth at frame-done if any
+		 * crtc is being used in video mode.
+		 */
+		if (dpu_encoder_get_intf_mode(encoder) == INTF_MODE_VIDEO)
+			release_bandwidth = true;
 		dpu_encoder_assign_crtc(encoder, NULL);
+	}
 
 	/* wait for frame_event_done completion */
 	if (_dpu_crtc_wait_for_frame_done(crtc))
@@ -760,7 +731,8 @@ static void dpu_crtc_disable(struct drm_crtc *crtc,
 	if (atomic_read(&dpu_crtc->frame_pending)) {
 		trace_dpu_crtc_disable_frame_pending(DRMID(crtc),
 				     atomic_read(&dpu_crtc->frame_pending));
-		dpu_core_perf_crtc_release_bw(crtc);
+		if (release_bandwidth)
+			dpu_core_perf_crtc_release_bw(crtc);
 		atomic_set(&dpu_crtc->frame_pending, 0);
 	}
 
@@ -792,6 +764,7 @@ static void dpu_crtc_enable(struct drm_crtc *crtc,
 	struct dpu_crtc *dpu_crtc;
 	struct drm_encoder *encoder;
 	struct msm_drm_private *priv;
+	bool request_bandwidth;
 
 	if (!crtc || !crtc->dev || !crtc->dev->dev_private) {
 		DPU_ERROR("invalid crtc\n");
@@ -804,9 +777,19 @@ static void dpu_crtc_enable(struct drm_crtc *crtc,
 	DRM_DEBUG_KMS("crtc%d\n", crtc->base.id);
 	dpu_crtc = to_dpu_crtc(crtc);
 
-	drm_for_each_encoder_mask(encoder, crtc->dev, crtc->state->encoder_mask)
+	drm_for_each_encoder_mask(encoder, crtc->dev, crtc->state->encoder_mask) {
+		/* in video mode, we hold an extra bandwidth reference
+		 * as we cannot drop bandwidth at frame-done if any
+		 * crtc is being used in video mode.
+		 */
+		if (dpu_encoder_get_intf_mode(encoder) == INTF_MODE_VIDEO)
+			request_bandwidth = true;
 		dpu_encoder_register_frame_event_callback(encoder,
 				dpu_crtc_frame_event_cb, (void *)crtc);
+	}
+
+	if (request_bandwidth)
+		atomic_inc(&_dpu_crtc_get_kms(crtc)->bandwidth_ref);
 
 	trace_dpu_crtc_enable(DRMID(crtc), true, dpu_crtc);
 	dpu_crtc->enabled = true;
@@ -981,6 +964,8 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc,
 		}
 	}
 
+	atomic_inc(&_dpu_crtc_get_kms(crtc)->bandwidth_ref);
+
 	rc = dpu_core_perf_crtc_check(crtc, state);
 	if (rc) {
 		DPU_ERROR("crtc%d failed performance check %d\n",
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
index 5181f079a6a1..5174e86124cc 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
@@ -238,17 +238,14 @@ void dpu_crtc_vblank_callback(struct drm_crtc *crtc);
 /**
  * dpu_crtc_commit_kickoff - trigger kickoff of the commit for this crtc
  * @crtc: Pointer to drm crtc object
- * @async: true if the commit is asynchronous, false otherwise
  */
-void dpu_crtc_commit_kickoff(struct drm_crtc *crtc, bool async);
+void dpu_crtc_commit_kickoff(struct drm_crtc *crtc);
 
 /**
  * dpu_crtc_complete_commit - callback signalling completion of current commit
  * @crtc: Pointer to drm crtc object
- * @old_state: Pointer to drm crtc old state object
  */
-void dpu_crtc_complete_commit(struct drm_crtc *crtc,
-		struct drm_crtc_state *old_state);
+void dpu_crtc_complete_commit(struct drm_crtc *crtc);
 
 /**
  * dpu_crtc_init - create a new crtc object
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 0aa8a12c9952..d82ea994063f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -6,14 +6,16 @@
  */
 
 #define pr_fmt(fmt)	"[drm:%s:%d] " fmt, __func__, __LINE__
-#include <linux/kthread.h>
 #include <linux/debugfs.h>
+#include <linux/kthread.h>
 #include <linux/seq_file.h>
 
-#include "msm_drv.h"
-#include "dpu_kms.h"
 #include <drm/drm_crtc.h>
+#include <drm/drm_file.h>
 #include <drm/drm_probe_helper.h>
+
+#include "msm_drv.h"
+#include "dpu_kms.h"
 #include "dpu_hwio.h"
 #include "dpu_hw_catalog.h"
 #include "dpu_hw_intf.h"
@@ -1421,19 +1423,12 @@ static void dpu_encoder_off_work(struct work_struct *work)
  * extra_flush_bits: Additional bit mask to include in flush trigger
  */
 static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc,
-		struct dpu_encoder_phys *phys, uint32_t extra_flush_bits,
-		bool async)
+		struct dpu_encoder_phys *phys, uint32_t extra_flush_bits)
 {
 	struct dpu_hw_ctl *ctl;
 	int pending_kickoff_cnt;
 	u32 ret = UINT_MAX;
 
-	if (!drm_enc || !phys) {
-		DPU_ERROR("invalid argument(s), drm_enc %d, phys_enc %d\n",
-				drm_enc != 0, phys != 0);
-		return;
-	}
-
 	if (!phys->hw_pp) {
 		DPU_ERROR("invalid pingpong hw\n");
 		return;
@@ -1445,10 +1440,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc,
 		return;
 	}
 
-	if (!async)
-		pending_kickoff_cnt = dpu_encoder_phys_inc_pending(phys);
-	else
-		pending_kickoff_cnt = atomic_read(&phys->pending_kickoff_cnt);
+	pending_kickoff_cnt = dpu_encoder_phys_inc_pending(phys);
 
 	if (extra_flush_bits && ctl->ops.update_pending_flush)
 		ctl->ops.update_pending_flush(ctl, extra_flush_bits);
@@ -1559,18 +1551,12 @@ static void dpu_encoder_helper_hw_reset(struct dpu_encoder_phys *phys_enc)
  *	a time.
  * dpu_enc: Pointer to virtual encoder structure
  */
-static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc,
-				      bool async)
+static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc)
 {
 	struct dpu_hw_ctl *ctl;
 	uint32_t i, pending_flush;
 	unsigned long lock_flags;
 
-	if (!dpu_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
 	pending_flush = 0x0;
 
 	/* update pending counts and trigger kickoff ctl flush atomically */
@@ -1592,13 +1578,12 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc,
 		 * for async commits. So don't set this for async, since it'll
 		 * roll over to the next commit.
 		 */
-		if (!async && phys->split_role != ENC_ROLE_SLAVE)
+		if (phys->split_role != ENC_ROLE_SLAVE)
 			set_bit(i, dpu_enc->frame_busy_mask);
 
 		if (!phys->ops.needs_single_flush ||
 				!phys->ops.needs_single_flush(phys))
-			_dpu_encoder_trigger_flush(&dpu_enc->base, phys, 0x0,
-						   async);
+			_dpu_encoder_trigger_flush(&dpu_enc->base, phys, 0x0);
 		else if (ctl->ops.get_pending_flush)
 			pending_flush |= ctl->ops.get_pending_flush(ctl);
 	}
@@ -1608,7 +1593,7 @@ static void _dpu_encoder_kickoff_phys(struct dpu_encoder_virt *dpu_enc,
 		_dpu_encoder_trigger_flush(
 				&dpu_enc->base,
 				dpu_enc->cur_master,
-				pending_flush, async);
+				pending_flush);
 	}
 
 	_dpu_encoder_trigger_start(dpu_enc->cur_master);
@@ -1695,8 +1680,7 @@ static u32 _dpu_encoder_calculate_linetime(struct dpu_encoder_virt *dpu_enc,
 	return line_time;
 }
 
-static int _dpu_encoder_wakeup_time(struct drm_encoder *drm_enc,
-		ktime_t *wakeup_time)
+int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time)
 {
 	struct drm_display_mode *mode;
 	struct dpu_encoder_virt *dpu_enc;
@@ -1783,7 +1767,7 @@ static void dpu_encoder_vsync_event_work_handler(struct kthread_work *work)
 		return;
 	}
 
-	if (_dpu_encoder_wakeup_time(&dpu_enc->base, &wakeup_time))
+	if (dpu_encoder_vsync_time(&dpu_enc->base, &wakeup_time))
 		return;
 
 	trace_dpu_enc_vsync_event_work(DRMID(&dpu_enc->base), wakeup_time);
@@ -1791,17 +1775,13 @@ static void dpu_encoder_vsync_event_work_handler(struct kthread_work *work)
 			nsecs_to_jiffies(ktime_to_ns(wakeup_time)));
 }
 
-void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc, bool async)
+void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc)
 {
 	struct dpu_encoder_virt *dpu_enc;
 	struct dpu_encoder_phys *phys;
 	bool needs_hw_reset = false;
 	unsigned int i;
 
-	if (!drm_enc) {
-		DPU_ERROR("invalid args\n");
-		return;
-	}
 	dpu_enc = to_dpu_encoder_virt(drm_enc);
 
 	trace_dpu_enc_prepare_kickoff(DRMID(drm_enc));
@@ -1830,39 +1810,28 @@ void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc, bool async)
 	}
 }
 
-void dpu_encoder_kickoff(struct drm_encoder *drm_enc, bool async)
+void dpu_encoder_kickoff(struct drm_encoder *drm_enc)
 {
 	struct dpu_encoder_virt *dpu_enc;
 	struct dpu_encoder_phys *phys;
 	ktime_t wakeup_time;
+	unsigned long timeout_ms;
 	unsigned int i;
 
-	if (!drm_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
 	DPU_ATRACE_BEGIN("encoder_kickoff");
 	dpu_enc = to_dpu_encoder_virt(drm_enc);
 
 	trace_dpu_enc_kickoff(DRMID(drm_enc));
 
-	/*
-	 * Asynchronous frames don't handle FRAME_DONE events. As such, they
-	 * shouldn't enable the frame_done watchdog since it will always time
-	 * out.
-	 */
-	if (!async) {
-		unsigned long timeout_ms;
-		timeout_ms = DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES * 1000 /
+	timeout_ms = DPU_ENCODER_FRAME_DONE_TIMEOUT_FRAMES * 1000 /
 			drm_mode_vrefresh(&drm_enc->crtc->state->adjusted_mode);
 
-		atomic_set(&dpu_enc->frame_done_timeout_ms, timeout_ms);
-		mod_timer(&dpu_enc->frame_done_timer,
-			  jiffies + msecs_to_jiffies(timeout_ms));
-	}
+	atomic_set(&dpu_enc->frame_done_timeout_ms, timeout_ms);
+	mod_timer(&dpu_enc->frame_done_timer,
+			jiffies + msecs_to_jiffies(timeout_ms));
 
 	/* All phys encs are ready to go, trigger the kickoff */
-	_dpu_encoder_kickoff_phys(dpu_enc, async);
+	_dpu_encoder_kickoff_phys(dpu_enc);
 
 	/* allow phys encs to handle any post-kickoff business */
 	for (i = 0; i < dpu_enc->num_phys_encs; i++) {
@@ -1872,7 +1841,7 @@ void dpu_encoder_kickoff(struct drm_encoder *drm_enc, bool async)
 	}
 
 	if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_DSI &&
-			!_dpu_encoder_wakeup_time(drm_enc, &wakeup_time)) {
+			!dpu_encoder_vsync_time(drm_enc, &wakeup_time)) {
 		trace_dpu_enc_early_kickoff(DRMID(drm_enc),
 					    ktime_to_ms(wakeup_time));
 		mod_timer(&dpu_enc->vsync_event_timer,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
index a8bf1147fc56..b4913465e602 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
@@ -68,9 +68,8 @@ void dpu_encoder_register_frame_event_callback(struct drm_encoder *encoder,
  *	Immediately: if no previous commit is outstanding.
  *	Delayed: Block until next trigger can be issued.
  * @encoder:	encoder pointer
- * @async:	true if this is an asynchronous commit
  */
-void dpu_encoder_prepare_for_kickoff(struct drm_encoder *encoder,  bool async);
+void dpu_encoder_prepare_for_kickoff(struct drm_encoder *encoder);
 
 /**
  * dpu_encoder_trigger_kickoff_pending - Clear the flush bits from previous
@@ -83,9 +82,13 @@ void dpu_encoder_trigger_kickoff_pending(struct drm_encoder *encoder);
  * dpu_encoder_kickoff - trigger a double buffer flip of the ctl path
  *	(i.e. ctl flush and start) immediately.
  * @encoder:	encoder pointer
- * @async:	true if this is an asynchronous commit
  */
-void dpu_encoder_kickoff(struct drm_encoder *encoder, bool async);
+void dpu_encoder_kickoff(struct drm_encoder *encoder);
+
+/**
+ * dpu_encoder_wakeup_time - get the time of the next vsync
+ */
+int dpu_encoder_vsync_time(struct drm_encoder *drm_enc, ktime_t *wakeup_time);
 
 /**
  * dpu_encoder_wait_for_event - Waits for encoder events
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 1b3ab909f367..2923b63d95fe 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -720,9 +720,6 @@ static int dpu_encoder_phys_cmd_wait_for_vblank(
 static void dpu_encoder_phys_cmd_handle_post_kickoff(
 		struct dpu_encoder_phys *phys_enc)
 {
-	if (!phys_enc)
-		return;
-
 	/**
 	 * re-enable external TE, either for the first time after enabling
 	 * or if disabled for Autorefresh
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 5055a5eec869..b9c84fb4d4a1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -324,6 +324,10 @@ static void dpu_encoder_phys_vid_vblank_irq(void *arg, int irq_idx)
 
 	/* Signal any waiting atomic commit thread */
 	wake_up_all(&phys_enc->pending_kickoff_wq);
+
+	phys_enc->parent_ops->handle_frame_done(phys_enc->parent, phys_enc,
+			DPU_ENCODER_FRAME_EVENT_DONE);
+
 	DPU_ATRACE_END("vblank_irq");
 }
 
@@ -483,8 +487,8 @@ static void dpu_encoder_phys_vid_get_hw_resources(
 	hw_res->intfs[phys_enc->intf_idx - INTF_0] = INTF_MODE_VIDEO;
 }
 
-static int _dpu_encoder_phys_vid_wait_for_vblank(
-		struct dpu_encoder_phys *phys_enc, bool notify)
+static int dpu_encoder_phys_vid_wait_for_vblank(
+		struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_encoder_wait_info wait_info;
 	int ret;
@@ -499,10 +503,6 @@ static int _dpu_encoder_phys_vid_wait_for_vblank(
 	wait_info.timeout_ms = KICKOFF_TIMEOUT_MS;
 
 	if (!dpu_encoder_phys_vid_is_master(phys_enc)) {
-		if (notify && phys_enc->parent_ops->handle_frame_done)
-			phys_enc->parent_ops->handle_frame_done(
-					phys_enc->parent, phys_enc,
-					DPU_ENCODER_FRAME_EVENT_DONE);
 		return 0;
 	}
 
@@ -512,18 +512,29 @@ static int _dpu_encoder_phys_vid_wait_for_vblank(
 
 	if (ret == -ETIMEDOUT) {
 		dpu_encoder_helper_report_irq_timeout(phys_enc, INTR_IDX_VSYNC);
-	} else if (!ret && notify && phys_enc->parent_ops->handle_frame_done)
-		phys_enc->parent_ops->handle_frame_done(
-				phys_enc->parent, phys_enc,
-				DPU_ENCODER_FRAME_EVENT_DONE);
+	}
 
 	return ret;
 }
 
-static int dpu_encoder_phys_vid_wait_for_vblank(
+static int dpu_encoder_phys_vid_wait_for_commit_done(
 		struct dpu_encoder_phys *phys_enc)
 {
-	return _dpu_encoder_phys_vid_wait_for_vblank(phys_enc, true);
+	struct dpu_hw_ctl *hw_ctl = phys_enc->hw_ctl;
+	int ret;
+
+	if (!hw_ctl)
+		return 0;
+
+	ret = wait_event_timeout(phys_enc->pending_kickoff_wq,
+		(hw_ctl->ops.get_flush_register(hw_ctl) == 0),
+		msecs_to_jiffies(50));
+	if (ret <= 0) {
+		DPU_ERROR("vblank timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
 }
 
 static void dpu_encoder_phys_vid_prepare_for_kickoff(
@@ -595,7 +606,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
 	 * scanout buffer) don't latch properly..
 	 */
 	if (dpu_encoder_phys_vid_is_master(phys_enc)) {
-		ret = _dpu_encoder_phys_vid_wait_for_vblank(phys_enc, false);
+		ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc);
 		if (ret) {
 			atomic_set(&phys_enc->pending_kickoff_cnt, 0);
 			DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n",
@@ -612,11 +623,6 @@ static void dpu_encoder_phys_vid_handle_post_kickoff(
 {
 	unsigned long lock_flags;
 
-	if (!phys_enc) {
-		DPU_ERROR("invalid encoder\n");
-		return;
-	}
-
 	/*
 	 * Video mode must flush CTL before enabling timing engine
 	 * Video encoders need to turn on their interfaces now
@@ -681,7 +687,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops)
 	ops->destroy = dpu_encoder_phys_vid_destroy;
 	ops->get_hw_resources = dpu_encoder_phys_vid_get_hw_resources;
 	ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq;
-	ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_vblank;
+	ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done;
 	ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank;
 	ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_vblank;
 	ops->irq_control = dpu_encoder_phys_vid_irq_control;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 90f439812088..ec76b8687a98 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -9,7 +9,6 @@
 #include <linux/bug.h>
 #include <linux/bitmap.h>
 #include <linux/err.h>
-#include <drm/drmP.h>
 
 /**
  * Max hardware block count: For ex: max 12 SSPP pipes or
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index b2f7b0e886b5..179e8d52cadb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -102,9 +102,6 @@ static inline void dpu_hw_ctl_update_pending_flush(struct dpu_hw_ctl *ctx,
 
 static u32 dpu_hw_ctl_get_pending_flush(struct dpu_hw_ctl *ctx)
 {
-	if (!ctx)
-		return 0x0;
-
 	return ctx->pending_flush_mask;
 }
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
index 71b6987bff1e..27fbeb504362 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c
@@ -7,6 +7,7 @@
 #include <linux/clk/clk-conf.h>
 #include <linux/err.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 
 #include <drm/drm_print.h>
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h
index 09083e9f06bb..e6b5c772fa3b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.h
@@ -5,7 +5,6 @@
 #ifndef __DPU_IO_UTIL_H__
 #define __DPU_IO_UTIL_H__
 
-#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
@@ -14,12 +13,6 @@
 #define DEV_WARN(fmt, args...)  pr_warn(fmt, ##args)
 #define DEV_ERR(fmt, args...)   pr_err(fmt, ##args)
 
-struct dss_gpio {
-	unsigned int gpio;
-	unsigned int value;
-	char gpio_name[32];
-};
-
 enum dss_clk_type {
 	DSS_CLK_AHB, /* no set rate. rate controlled through rpm */
 	DSS_CLK_PCLK,
@@ -34,8 +27,6 @@ struct dss_clk {
 };
 
 struct dss_module_power {
-	unsigned int num_gpio;
-	struct dss_gpio *gpio_config;
 	unsigned int num_clk;
 	struct dss_clk *clk_config;
 };
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index bb9d44e7bd26..58b0485dc375 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -7,10 +7,12 @@
 
 #define pr_fmt(fmt)	"[drm:%s:%d] " fmt, __func__, __LINE__
 
-#include <drm/drm_crtc.h>
 #include <linux/debugfs.h>
-#include <linux/of_irq.h>
 #include <linux/dma-buf.h>
+#include <linux/of_irq.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_file.h>
 
 #include "msm_drv.h"
 #include "msm_mmu.h"
@@ -248,6 +250,32 @@ static void dpu_kms_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc)
 	dpu_crtc_vblank(crtc, false);
 }
 
+static void dpu_kms_enable_commit(struct msm_kms *kms)
+{
+	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
+	pm_runtime_get_sync(&dpu_kms->pdev->dev);
+}
+
+static void dpu_kms_disable_commit(struct msm_kms *kms)
+{
+	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
+	pm_runtime_put_sync(&dpu_kms->pdev->dev);
+}
+
+static ktime_t dpu_kms_vsync_time(struct msm_kms *kms, struct drm_crtc *crtc)
+{
+	struct drm_encoder *encoder;
+
+	drm_for_each_encoder_mask(encoder, crtc->dev, crtc->state->encoder_mask) {
+		ktime_t vsync_time;
+
+		if (dpu_encoder_vsync_time(encoder, &vsync_time) == 0)
+			return vsync_time;
+	}
+
+	return ktime_get();
+}
+
 static void dpu_kms_prepare_commit(struct msm_kms *kms,
 		struct drm_atomic_state *state)
 {
@@ -267,7 +295,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms,
 	if (!dev || !dev->dev_private)
 		return;
 	priv = dev->dev_private;
-	pm_runtime_get_sync(&dpu_kms->pdev->dev);
 
 	/* Call prepare_commit for all affected encoders */
 	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
@@ -278,6 +305,20 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms,
 	}
 }
 
+static void dpu_kms_flush_commit(struct msm_kms *kms, unsigned crtc_mask)
+{
+	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
+	struct drm_crtc *crtc;
+
+	for_each_crtc_mask(dpu_kms->dev, crtc, crtc_mask) {
+		if (!crtc->state->active)
+			continue;
+
+		trace_dpu_kms_commit(DRMID(crtc));
+		dpu_crtc_commit_kickoff(crtc);
+	}
+}
+
 /*
  * Override the encoder enable since we need to setup the inline rotator and do
  * some crtc magic before enabling any bridge that might be present.
@@ -298,52 +339,18 @@ void dpu_kms_encoder_enable(struct drm_encoder *encoder)
 			continue;
 
 		trace_dpu_kms_enc_enable(DRMID(crtc));
-		dpu_crtc_commit_kickoff(crtc, false);
-	}
-}
-
-static void dpu_kms_commit(struct msm_kms *kms, struct drm_atomic_state *state)
-{
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *crtc_state;
-	int i;
-
-	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
-		/* If modeset is required, kickoff is run in encoder_enable */
-		if (drm_atomic_crtc_needs_modeset(crtc_state))
-			continue;
-
-		if (crtc->state->active) {
-			trace_dpu_kms_commit(DRMID(crtc));
-			dpu_crtc_commit_kickoff(crtc,
-						state->legacy_cursor_update);
-		}
 	}
 }
 
-static void dpu_kms_complete_commit(struct msm_kms *kms,
-		struct drm_atomic_state *old_state)
+static void dpu_kms_complete_commit(struct msm_kms *kms, unsigned crtc_mask)
 {
-	struct dpu_kms *dpu_kms;
-	struct msm_drm_private *priv;
+	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
 	struct drm_crtc *crtc;
-	struct drm_crtc_state *old_crtc_state;
-	int i;
-
-	if (!kms || !old_state)
-		return;
-	dpu_kms = to_dpu_kms(kms);
-
-	if (!dpu_kms->dev || !dpu_kms->dev->dev_private)
-		return;
-	priv = dpu_kms->dev->dev_private;
 
 	DPU_ATRACE_BEGIN("kms_complete_commit");
 
-	for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i)
-		dpu_crtc_complete_commit(crtc, old_crtc_state);
-
-	pm_runtime_put_sync(&dpu_kms->pdev->dev);
+	for_each_crtc_mask(dpu_kms->dev, crtc, crtc_mask)
+		dpu_crtc_complete_commit(crtc);
 
 	DPU_ATRACE_END("kms_complete_commit");
 }
@@ -389,6 +396,15 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
 	}
 }
 
+static void dpu_kms_wait_flush(struct msm_kms *kms, unsigned crtc_mask)
+{
+	struct dpu_kms *dpu_kms = to_dpu_kms(kms);
+	struct drm_crtc *crtc;
+
+	for_each_crtc_mask(dpu_kms->dev, crtc, crtc_mask)
+		dpu_kms_wait_for_commit_done(kms, crtc);
+}
+
 static int _dpu_kms_initialize_dsi(struct drm_device *dev,
 				    struct msm_drm_private *priv,
 				    struct dpu_kms *dpu_kms)
@@ -490,11 +506,6 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms)
 	int primary_planes_idx = 0, cursor_planes_idx = 0, i, ret;
 	int max_crtc_count;
 
-	if (!dpu_kms || !dpu_kms->dev || !dpu_kms->dev->dev) {
-		DPU_ERROR("invalid dpu_kms\n");
-		return -EINVAL;
-	}
-
 	dev = dpu_kms->dev;
 	priv = dev->dev_private;
 	catalog = dpu_kms->catalog;
@@ -686,10 +697,13 @@ static const struct msm_kms_funcs kms_funcs = {
 	.irq_preinstall  = dpu_irq_preinstall,
 	.irq_uninstall   = dpu_irq_uninstall,
 	.irq             = dpu_irq,
+	.enable_commit   = dpu_kms_enable_commit,
+	.disable_commit  = dpu_kms_disable_commit,
+	.vsync_time      = dpu_kms_vsync_time,
 	.prepare_commit  = dpu_kms_prepare_commit,
-	.commit          = dpu_kms_commit,
+	.flush_commit    = dpu_kms_flush_commit,
+	.wait_flush      = dpu_kms_wait_flush,
 	.complete_commit = dpu_kms_complete_commit,
-	.wait_for_crtc_commit_done = dpu_kms_wait_for_commit_done,
 	.enable_vblank   = dpu_kms_enable_vblank,
 	.disable_vblank  = dpu_kms_disable_vblank,
 	.check_modified_format = dpu_format_check_modified_format,
@@ -800,6 +814,8 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		return rc;
 	}
 
+	atomic_set(&dpu_kms->bandwidth_ref, 0);
+
 	dpu_kms->mmio = msm_ioremap(dpu_kms->pdev, "mdp", "mdp");
 	if (IS_ERR(dpu_kms->mmio)) {
 		rc = PTR_ERR(dpu_kms->mmio);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 9e40f559c51f..4c889aabdaf9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -8,6 +8,8 @@
 #ifndef __DPU_KMS_H__
 #define __DPU_KMS_H__
 
+#include <drm/drm_drv.h>
+
 #include "msm_drv.h"
 #include "msm_kms.h"
 #include "msm_mmu.h"
@@ -120,6 +122,14 @@ struct dpu_kms {
 	struct platform_device *pdev;
 	bool rpm_enabled;
 	struct dss_module_power mp;
+
+	/* reference count bandwidth requests, so we know when we can
+	 * release bandwidth.  Each atomic update increments, and frame-
+	 * done event decrements.  Additionally, for video mode, the
+	 * reference is incremented when crtc is enabled, and decremented
+	 * when disabled.
+	 */
+	atomic_t bandwidth_ref;
 };
 
 struct vsync_info {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index 986915bbbc02..29705e773a4b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -3,6 +3,10 @@
  * Copyright (c) 2018, The Linux Foundation
  */
 
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdesc.h>
+#include <linux/irqchip/chained_irq.h>
 #include "dpu_kms.h"
 #include <linux/interconnect.h>
 
@@ -22,7 +26,6 @@ struct dpu_mdss {
 	struct msm_mdss base;
 	void __iomem *mmio;
 	unsigned long mmio_len;
-	u32 hwversion;
 	struct dss_module_power mp;
 	struct dpu_irq_controller irq_controller;
 	struct icc_path *path[2];
@@ -287,10 +290,6 @@ int dpu_mdss_init(struct drm_device *dev)
 
 	dpu_mdss_icc_request_bw(priv->mdss);
 
-	pm_runtime_get_sync(dev->dev);
-	dpu_mdss->hwversion = readl_relaxed(dpu_mdss->mmio);
-	pm_runtime_put_sync(dev->dev);
-
 	return ret;
 
 irq_error:
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 8cf0b8a4ed03..58d5acbcfc5c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -10,8 +10,9 @@
 #include <linux/debugfs.h>
 #include <linux/dma-buf.h>
 
-#include <drm/drm_damage_helper.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_file.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
 #include "msm_drv.h"
@@ -1036,8 +1037,21 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane)
 				pstate->multirect_mode);
 
 	if (pdpu->pipe_hw->ops.setup_format) {
+		unsigned int rotation;
+
 		src_flags = 0x0;
 
+		rotation = drm_rotation_simplify(state->rotation,
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_X |
+						 DRM_MODE_REFLECT_Y);
+
+		if (rotation & DRM_MODE_REFLECT_X)
+			src_flags |= DPU_SSPP_FLIP_LR;
+
+		if (rotation & DRM_MODE_REFLECT_Y)
+			src_flags |= DPU_SSPP_FLIP_UD;
+
 		/* update format */
 		pdpu->pipe_hw->ops.setup_format(pdpu->pipe_hw, fmt, src_flags,
 				pstate->multirect_index);
@@ -1518,6 +1532,13 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 	if (ret)
 		DPU_ERROR("failed to install zpos property, rc = %d\n", ret);
 
+	drm_plane_create_rotation_property(plane,
+			DRM_MODE_ROTATE_0,
+			DRM_MODE_ROTATE_0 |
+			DRM_MODE_ROTATE_180 |
+			DRM_MODE_REFLECT_X |
+			DRM_MODE_REFLECT_Y);
+
 	drm_plane_enable_fb_damage_clips(plane);
 
 	/* success! finalize initialization */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
index 765484437d11..eecfe9b3199e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
@@ -392,7 +392,7 @@ TRACE_EVENT(dpu_enc_rc,
 		__entry->rc_state = rc_state;
 		__assign_str(stage_str, stage);
 	),
-	TP_printk("%s: id:%u, sw_event:%d, idle_pc_supported:%s, rc_state:%d\n",
+	TP_printk("%s: id:%u, sw_event:%d, idle_pc_supported:%s, rc_state:%d",
 		  __get_str(stage_str), __entry->drm_id, __entry->sw_event,
 		  __entry->idle_pc_supported ? "true" : "false",
 		  __entry->rc_state)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
index 8bc3aea7cd86..8d24b79fd400 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
@@ -5,6 +5,7 @@
 #define pr_fmt(fmt)	"[drm:%s:%d] " fmt, __func__, __LINE__
 
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 
 #include "dpu_vbif.h"
 #include "dpu_hw_vbif.h"
@@ -264,11 +265,6 @@ void dpu_vbif_clear_errors(struct dpu_kms *dpu_kms)
 	struct dpu_hw_vbif *vbif;
 	u32 i, pnd, src;
 
-	if (!dpu_kms) {
-		DPU_ERROR("invalid argument\n");
-		return;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
 		vbif = dpu_kms->hw_vbif[i];
 		if (vbif && vbif->ops.clear_errors) {
@@ -286,11 +282,6 @@ void dpu_vbif_init_memtypes(struct dpu_kms *dpu_kms)
 	struct dpu_hw_vbif *vbif;
 	int i, j;
 
-	if (!dpu_kms) {
-		DPU_ERROR("invalid argument\n");
-		return;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
 		vbif = dpu_kms->hw_vbif[i];
 		if (vbif && vbif->cap && vbif->ops.set_mem_type) {
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
index 668c41975d74..f34dca5d4532 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c
@@ -8,6 +8,7 @@
 #include <drm/drm_flip_work.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 
 #include "mdp4_kms.h"
 
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c
index 62fbca302ac2..4d49f3ba6a96 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_irq.c
@@ -5,6 +5,7 @@
  */
 
 #include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "msm_drv.h"
 #include "mdp4_kms.h"
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 7a9ab55b4608..50711ccc8691 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -4,6 +4,9 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/delay.h>
+
+#include <drm/drm_vblank.h>
 
 #include "msm_drv.h"
 #include "msm_gem.h"
@@ -93,40 +96,51 @@ out:
 	return ret;
 }
 
-static void mdp4_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state)
+static void mdp4_enable_commit(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	mdp4_enable(mdp4_kms);
+}
+
+static void mdp4_disable_commit(struct msm_kms *kms)
 {
 	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	mdp4_disable(mdp4_kms);
+}
+
+static void mdp4_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state)
+{
 	int i;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
 
-	mdp4_enable(mdp4_kms);
-
 	/* see 119ecb7fd */
 	for_each_new_crtc_in_state(state, crtc, crtc_state, i)
 		drm_crtc_vblank_get(crtc);
 }
 
-static void mdp4_complete_commit(struct msm_kms *kms, struct drm_atomic_state *state)
+static void mdp4_flush_commit(struct msm_kms *kms, unsigned crtc_mask)
+{
+	/* TODO */
+}
+
+static void mdp4_wait_flush(struct msm_kms *kms, unsigned crtc_mask)
 {
 	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
-	int i;
 	struct drm_crtc *crtc;
-	struct drm_crtc_state *crtc_state;
 
-	drm_atomic_helper_wait_for_vblanks(mdp4_kms->dev, state);
-
-	/* see 119ecb7fd */
-	for_each_new_crtc_in_state(state, crtc, crtc_state, i)
-		drm_crtc_vblank_put(crtc);
-
-	mdp4_disable(mdp4_kms);
+	for_each_crtc_mask(mdp4_kms->dev, crtc, crtc_mask)
+		mdp4_crtc_wait_for_commit_done(crtc);
 }
 
-static void mdp4_wait_for_crtc_commit_done(struct msm_kms *kms,
-						struct drm_crtc *crtc)
+static void mdp4_complete_commit(struct msm_kms *kms, unsigned crtc_mask)
 {
-	mdp4_crtc_wait_for_commit_done(crtc);
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	struct drm_crtc *crtc;
+
+	/* see 119ecb7fd */
+	for_each_crtc_mask(mdp4_kms->dev, crtc, crtc_mask)
+		drm_crtc_vblank_put(crtc);
 }
 
 static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
@@ -178,9 +192,12 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.irq             = mdp4_irq,
 		.enable_vblank   = mdp4_enable_vblank,
 		.disable_vblank  = mdp4_disable_vblank,
+		.enable_commit   = mdp4_enable_commit,
+		.disable_commit  = mdp4_disable_commit,
 		.prepare_commit  = mdp4_prepare_commit,
+		.flush_commit    = mdp4_flush_commit,
+		.wait_flush      = mdp4_wait_flush,
 		.complete_commit = mdp4_complete_commit,
-		.wait_for_crtc_commit_done = mdp4_wait_for_crtc_commit_done,
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp4_round_pixclk,
 		.destroy         = mdp4_destroy,
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 62e2ebe455ea..871f3514ef69 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -5,6 +5,8 @@
  * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
  */
 
+#include <linux/delay.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_probe_helper.h>
 
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
index ecef4f5b9f26..9262ed2dc8c3 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
@@ -5,8 +5,6 @@
  * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
  */
 
-#include <linux/gpio.h>
-
 #include "mdp4_kms.h"
 
 struct mdp4_lvds_connector {
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
index e3010f023371..da3cc1d8c331 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c
@@ -5,6 +5,8 @@
  */
 
 #include <drm/drm_damage_helper.h>
+#include <drm/drm_fourcc.h>
+
 #include "mdp4_kms.h"
 
 #define DOWN_SCALE_MAX	8
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
index dd1daf0e305a..f6e71ff539ca 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
@@ -630,7 +630,115 @@ const struct mdp5_cfg_hw msm8917_config = {
 	.max_clk = 320000000,
 };
 
-static const struct mdp5_cfg_handler cfg_handlers[] = {
+const struct mdp5_cfg_hw msm8998_config = {
+	.name = "msm8998",
+	.mdp = {
+		.count = 1,
+		.caps = MDP_CAP_DSC |
+			MDP_CAP_CDM |
+			MDP_CAP_SRC_SPLIT |
+			0,
+	},
+	.ctl = {
+		.count = 5,
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
+		.flush_hw_mask = 0xf7ffffff,
+	},
+	.pipe_vig = {
+		.count = 4,
+		.base = { 0x04000, 0x06000, 0x08000, 0x0a000 },
+		.caps = MDP_PIPE_CAP_HFLIP	|
+			MDP_PIPE_CAP_VFLIP	|
+			MDP_PIPE_CAP_SCALE	|
+			MDP_PIPE_CAP_CSC	|
+			MDP_PIPE_CAP_DECIMATION	|
+			MDP_PIPE_CAP_SW_PIX_EXT	|
+			0,
+	},
+	.pipe_rgb = {
+		.count = 4,
+		.base = { 0x14000, 0x16000, 0x18000, 0x1a000 },
+		.caps = MDP_PIPE_CAP_HFLIP	|
+			MDP_PIPE_CAP_VFLIP	|
+			MDP_PIPE_CAP_SCALE	|
+			MDP_PIPE_CAP_DECIMATION	|
+			MDP_PIPE_CAP_SW_PIX_EXT	|
+			0,
+	},
+	.pipe_dma = {
+		.count = 2, /* driver supports max of 2 currently */
+		.base = { 0x24000, 0x26000, 0x28000, 0x2a000 },
+		.caps = MDP_PIPE_CAP_HFLIP	|
+			MDP_PIPE_CAP_VFLIP	|
+			MDP_PIPE_CAP_SW_PIX_EXT	|
+			0,
+	},
+	.pipe_cursor = {
+		.count = 2,
+		.base = { 0x34000, 0x36000 },
+		.caps = MDP_PIPE_CAP_HFLIP	|
+			MDP_PIPE_CAP_VFLIP	|
+			MDP_PIPE_CAP_SW_PIX_EXT	|
+			MDP_PIPE_CAP_CURSOR	|
+			0,
+	},
+
+	.lm = {
+		.count = 6,
+		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = -1,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 5, .pp = 3, .dspp = -1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+			     },
+		.nb_stages = 8,
+		.max_width = 2560,
+		.max_height = 0xFFFF,
+	},
+	.dspp = {
+		.count = 2,
+		.base = { 0x54000, 0x56000 },
+	},
+	.ad = {
+		.count = 3,
+		.base = { 0x78000, 0x78800, 0x79000 },
+	},
+	.pp = {
+		.count = 4,
+		.base = { 0x70000, 0x70800, 0x71000, 0x71800 },
+	},
+	.cdm = {
+		.count = 1,
+		.base = { 0x79200 },
+	},
+	.dsc = {
+		.count = 2,
+		.base = { 0x80000, 0x80400 },
+	},
+	.intf = {
+		.base = { 0x6a000, 0x6a800, 0x6b000, 0x6b800, 0x6c000 },
+		.connect = {
+			[0] = INTF_eDP,
+			[1] = INTF_DSI,
+			[2] = INTF_DSI,
+			[3] = INTF_HDMI,
+		},
+	},
+	.max_clk = 412500000,
+};
+
+static const struct mdp5_cfg_handler cfg_handlers_v1[] = {
 	{ .revision = 0, .config = { .hw = &msm8x74v1_config } },
 	{ .revision = 2, .config = { .hw = &msm8x74v2_config } },
 	{ .revision = 3, .config = { .hw = &apq8084_config } },
@@ -640,6 +748,10 @@ static const struct mdp5_cfg_handler cfg_handlers[] = {
 	{ .revision = 15, .config = { .hw = &msm8917_config } },
 };
 
+static const struct mdp5_cfg_handler cfg_handlers_v3[] = {
+	{ .revision = 0, .config = { .hw = &msm8998_config } },
+};
+
 static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev);
 
 const struct mdp5_cfg_hw *mdp5_cfg_get_hw_config(struct mdp5_cfg_handler *cfg_handler)
@@ -668,8 +780,9 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 	struct drm_device *dev = mdp5_kms->dev;
 	struct platform_device *pdev = to_platform_device(dev->dev);
 	struct mdp5_cfg_handler *cfg_handler;
+	const struct mdp5_cfg_handler *cfg_handlers;
 	struct mdp5_cfg_platform *pconfig;
-	int i, ret = 0;
+	int i, ret = 0, num_handlers;
 
 	cfg_handler = kzalloc(sizeof(*cfg_handler), GFP_KERNEL);
 	if (unlikely(!cfg_handler)) {
@@ -677,15 +790,24 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms,
 		goto fail;
 	}
 
-	if (major != 1) {
+	switch (major) {
+	case 1:
+		cfg_handlers = cfg_handlers_v1;
+		num_handlers = ARRAY_SIZE(cfg_handlers_v1);
+		break;
+	case 3:
+		cfg_handlers = cfg_handlers_v3;
+		num_handlers = ARRAY_SIZE(cfg_handlers_v3);
+		break;
+	default:
 		DRM_DEV_ERROR(dev->dev, "unexpected MDP major version: v%d.%d\n",
 				major, minor);
 		ret = -ENXIO;
 		goto fail;
-	}
+	};
 
 	/* only after mdp5_cfg global pointer's init can we access the hw */
-	for (i = 0; i < ARRAY_SIZE(cfg_handlers); i++) {
+	for (i = 0; i < num_handlers; i++) {
 		if (cfg_handlers[i].revision != minor)
 			continue;
 		mdp5_cfg = cfg_handlers[i].config.hw;
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index 78d5fa230c16..eb0b4b7dc7cc 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -6,10 +6,13 @@
  */
 
 #include <linux/sort.h>
+
 #include <drm/drm_mode.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_flip_work.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 
 #include "mdp5_kms.h"
 
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
index 4804cf40de14..030279d7b64b 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c
@@ -253,7 +253,7 @@ int mdp5_ctl_set_cursor(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
 	u32 blend_cfg;
 	struct mdp5_hw_mixer *mixer = pipeline->mixer;
 
-	if (unlikely(WARN_ON(!mixer))) {
+	if (WARN_ON(!mixer)) {
 		DRM_DEV_ERROR(ctl_mgr->dev->dev, "CTL %d cannot find LM",
 			ctl->id);
 		return -EINVAL;
@@ -695,7 +695,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev,
 		goto fail;
 	}
 
-	if (unlikely(WARN_ON(ctl_cfg->count > MAX_CTL))) {
+	if (WARN_ON(ctl_cfg->count > MAX_CTL)) {
 		DRM_DEV_ERROR(dev->dev, "Increase static pool size to at least %d\n",
 				ctl_cfg->count);
 		ret = -ENOSPC;
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c
index 58db08a2abfa..9b4c8d92ff32 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_irq.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 
 #include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "msm_drv.h"
 #include "mdp5_kms.h"
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index fec6ef1ae3b9..91cd76a2bab1 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -5,9 +5,15 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/delay.h>
 #include <linux/interconnect.h>
 #include <linux/of_irq.h>
 
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_vblank.h>
+
 #include "msm_drv.h"
 #include "msm_gem.h"
 #include "msm_mmu.h"
@@ -140,40 +146,52 @@ static int mdp5_global_obj_init(struct mdp5_kms *mdp5_kms)
 	return 0;
 }
 
+static void mdp5_enable_commit(struct msm_kms *kms)
+{
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
+	pm_runtime_get_sync(&mdp5_kms->pdev->dev);
+}
+
+static void mdp5_disable_commit(struct msm_kms *kms)
+{
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
+	pm_runtime_put_sync(&mdp5_kms->pdev->dev);
+}
+
 static void mdp5_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
-	struct device *dev = &mdp5_kms->pdev->dev;
 	struct mdp5_global_state *global_state;
 
 	global_state = mdp5_get_existing_global_state(mdp5_kms);
 
-	pm_runtime_get_sync(dev);
-
 	if (mdp5_kms->smp)
 		mdp5_smp_prepare_commit(mdp5_kms->smp, &global_state->smp);
 }
 
-static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *state)
+static void mdp5_flush_commit(struct msm_kms *kms, unsigned crtc_mask)
+{
+	/* TODO */
+}
+
+static void mdp5_wait_flush(struct msm_kms *kms, unsigned crtc_mask)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
-	struct device *dev = &mdp5_kms->pdev->dev;
-	struct mdp5_global_state *global_state;
+	struct drm_crtc *crtc;
 
-	drm_atomic_helper_wait_for_vblanks(mdp5_kms->dev, state);
+	for_each_crtc_mask(mdp5_kms->dev, crtc, crtc_mask)
+		mdp5_crtc_wait_for_commit_done(crtc);
+}
+
+static void mdp5_complete_commit(struct msm_kms *kms, unsigned crtc_mask)
+{
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
+	struct mdp5_global_state *global_state;
 
 	global_state = mdp5_get_existing_global_state(mdp5_kms);
 
 	if (mdp5_kms->smp)
 		mdp5_smp_complete_commit(mdp5_kms->smp, &global_state->smp);
-
-	pm_runtime_put_sync(dev);
-}
-
-static void mdp5_wait_for_crtc_commit_done(struct msm_kms *kms,
-						struct drm_crtc *crtc)
-{
-	mdp5_crtc_wait_for_commit_done(crtc);
 }
 
 static long mdp5_round_pixclk(struct msm_kms *kms, unsigned long rate,
@@ -271,9 +289,12 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.irq             = mdp5_irq,
 		.enable_vblank   = mdp5_enable_vblank,
 		.disable_vblank  = mdp5_disable_vblank,
+		.flush_commit    = mdp5_flush_commit,
+		.enable_commit   = mdp5_enable_commit,
+		.disable_commit  = mdp5_disable_commit,
 		.prepare_commit  = mdp5_prepare_commit,
+		.wait_flush      = mdp5_wait_flush,
 		.complete_commit = mdp5_complete_commit,
-		.wait_for_crtc_commit_done = mdp5_wait_for_crtc_commit_done,
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
@@ -663,6 +684,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	struct msm_kms *kms;
 	struct msm_gem_address_space *aspace;
 	int irq, i, ret;
+	struct device *iommu_dev;
 
 	/* priv->kms would have been populated by the MDP5 driver */
 	kms = priv->kms;
@@ -702,7 +724,11 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	mdelay(16);
 
 	if (config->platform.iommu) {
-		aspace = msm_gem_address_space_create(&pdev->dev,
+		iommu_dev = &pdev->dev;
+		if (!iommu_dev->iommu_fwspec)
+			iommu_dev = iommu_dev->parent;
+
+		aspace = msm_gem_address_space_create(iommu_dev,
 				config->platform.iommu, "mdp5");
 		if (IS_ERR(aspace)) {
 			ret = PTR_ERR(aspace);
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
index c7e6725693ea..83423092de2f 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
@@ -6,7 +6,9 @@
  */
 
 #include <drm/drm_damage_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_print.h>
+
 #include "mdp5_kms.h"
 
 struct mdp5_plane {
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
index 776337f85a68..b31cfb554fa2 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
@@ -5,6 +5,7 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <drm/drm_fourcc.h>
 #include <drm/drm_util.h>
 
 #include "mdp5_kms.h"
diff --git a/drivers/gpu/drm/msm/disp/mdp_format.c b/drivers/gpu/drm/msm/disp/mdp_format.c
index 8afb0f9c04bb..5495d8b3f5b9 100644
--- a/drivers/gpu/drm/msm/disp/mdp_format.c
+++ b/drivers/gpu/drm/msm/disp/mdp_format.c
@@ -174,7 +174,7 @@ const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format,
 
 struct csc_cfg *mdp_get_default_csc_cfg(enum csc_type type)
 {
-	if (unlikely(WARN_ON(type >= CSC_MAX)))
+	if (WARN_ON(type >= CSC_MAX))
 		return NULL;
 
 	return &csc_convert[type];
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index aa35d18ab43c..663ff9f4fac9 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -5,19 +5,19 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/of_graph.h>
 #include <linux/of_irq.h>
 #include <linux/pinctrl/consumer.h>
-#include <linux/of_graph.h>
+#include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spinlock.h>
-#include <linux/mfd/syscon.h>
-#include <linux/regmap.h>
+
 #include <video/mipi_display.h>
 
 #include "dsi.h"
@@ -421,15 +421,15 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host)
 	}
 
 	msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk);
-	if (!msm_host->byte_clk_src) {
-		ret = -ENODEV;
+	if (IS_ERR(msm_host->byte_clk_src)) {
+		ret = PTR_ERR(msm_host->byte_clk_src);
 		pr_err("%s: can't find byte_clk clock. ret=%d\n", __func__, ret);
 		goto exit;
 	}
 
 	msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk);
-	if (!msm_host->pixel_clk_src) {
-		ret = -ENODEV;
+	if (IS_ERR(msm_host->pixel_clk_src)) {
+		ret = PTR_ERR(msm_host->pixel_clk_src);
 		pr_err("%s: can't find pixel_clk clock. ret=%d\n", __func__, ret);
 		goto exit;
 	}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 4097eca1b3ef..3522863a4984 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -396,8 +396,12 @@ static int dsi_phy_regulator_init(struct msm_dsi_phy *phy)
 
 	ret = devm_regulator_bulk_get(dev, num, s);
 	if (ret < 0) {
-		DRM_DEV_ERROR(dev, "%s: failed to init regulator, ret=%d\n",
-						__func__, ret);
+		if (ret != -EPROBE_DEFER) {
+			DRM_DEV_ERROR(dev,
+				      "%s: failed to init regulator, ret=%d\n",
+				      __func__, ret);
+		}
+
 		return ret;
 	}
 
@@ -584,10 +588,8 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 	}
 
 	ret = dsi_phy_regulator_init(phy);
-	if (ret) {
-		DRM_DEV_ERROR(dev, "%s: failed to init regulator\n", __func__);
+	if (ret)
 		goto fail;
-	}
 
 	phy->ahb_clk = msm_clk_get(pdev, "iface");
 	if (IS_ERR(phy->ahb_clk)) {
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index c3a61876470f..1594f1422372 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -3,6 +3,8 @@
  * Copyright (c) 2016, The Linux Foundation. All rights reserved.
  */
 
+#include <linux/delay.h>
+
 #include "dsi_phy.h"
 #include "dsi.xml.h"
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index a198f51d47b4..f22583353957 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -3,6 +3,8 @@
  * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
  */
 
+#include <linux/delay.h>
+
 #include "dsi_phy.h"
 #include "dsi.xml.h"
 
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
index 118bebe53de3..c6a3623f905d 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
@@ -6,8 +6,8 @@
 #ifndef __DSI_PLL_H__
 #define __DSI_PLL_H__
 
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 
 #include "dsi.h"
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 0e4217be3f00..355afb936401 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -425,38 +425,6 @@ static const struct {
 	{ "qcom,hdmi-tx-mux-lpm", true, 1, "HDMI_MUX_LPM" },
 };
 
-static int msm_hdmi_get_gpio(struct device_node *of_node, const char *name)
-{
-	int gpio;
-
-	/* try with the gpio names as in the table (downstream bindings) */
-	gpio = of_get_named_gpio(of_node, name, 0);
-	if (gpio < 0) {
-		char name2[32];
-
-		/* try with the gpio names as in the upstream bindings */
-		snprintf(name2, sizeof(name2), "%s-gpios", name);
-		gpio = of_get_named_gpio(of_node, name2, 0);
-		if (gpio < 0) {
-			char name3[32];
-
-			/*
-			 * try again after stripping out the "qcom,hdmi-tx"
-			 * prefix. This is mainly to match "hpd-gpios" used
-			 * in the upstream bindings
-			 */
-			if (sscanf(name2, "qcom,hdmi-tx-%s", name3))
-				gpio = of_get_named_gpio(of_node, name3, 0);
-		}
-
-		if (gpio < 0) {
-			DBG("failed to get gpio: %s (%d)", name, gpio);
-			gpio = -1;
-		}
-	}
-	return gpio;
-}
-
 /*
  * HDMI audio codec callbacks
  */
@@ -582,11 +550,39 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 	hdmi_cfg->qfprom_mmio_name = "qfprom_physical";
 
 	for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
-		hdmi_cfg->gpios[i].num = msm_hdmi_get_gpio(of_node,
-						msm_hdmi_gpio_pdata[i].name);
+		const char *name = msm_hdmi_gpio_pdata[i].name;
+		struct gpio_desc *gpiod;
+
+		/*
+		 * We are fetching the GPIO lines "as is" since the connector
+		 * code is enabling and disabling the lines. Until that point
+		 * the power-on default value will be kept.
+		 */
+		gpiod = devm_gpiod_get_optional(dev, name, GPIOD_ASIS);
+		/* This will catch e.g. -PROBE_DEFER */
+		if (IS_ERR(gpiod))
+			return PTR_ERR(gpiod);
+		if (!gpiod) {
+			/* Try a second time, stripping down the name */
+			char name3[32];
+
+			/*
+			 * Try again after stripping out the "qcom,hdmi-tx"
+			 * prefix. This is mainly to match "hpd-gpios" used
+			 * in the upstream bindings.
+			 */
+			if (sscanf(name, "qcom,hdmi-tx-%s", name3))
+				gpiod = devm_gpiod_get_optional(dev, name3, GPIOD_ASIS);
+			if (IS_ERR(gpiod))
+				return PTR_ERR(gpiod);
+			if (!gpiod)
+				DBG("failed to get gpio: %s", name);
+		}
+		hdmi_cfg->gpios[i].gpiod = gpiod;
+		if (gpiod)
+			gpiod_set_consumer_name(gpiod, msm_hdmi_gpio_pdata[i].label);
 		hdmi_cfg->gpios[i].output = msm_hdmi_gpio_pdata[i].output;
 		hdmi_cfg->gpios[i].value = msm_hdmi_gpio_pdata[i].value;
-		hdmi_cfg->gpios[i].label = msm_hdmi_gpio_pdata[i].label;
 	}
 
 	dev->platform_data = hdmi_cfg;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index 982865866a29..bdac452b00fb 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -11,6 +11,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
+#include <linux/gpio/consumer.h>
 #include <linux/hdmi.h>
 
 #include "msm_drv.h"
@@ -22,10 +23,9 @@ struct hdmi_phy;
 struct hdmi_platform_config;
 
 struct hdmi_gpio_data {
-	int num;
+	struct gpio_desc *gpiod;
 	bool output;
 	int value;
-	const char *label;
 };
 
 struct hdmi_audio {
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
index c8dbd82854c2..ba81338a9bf8 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
@@ -4,6 +4,8 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/delay.h>
+
 #include "hdmi.h"
 
 struct hdmi_bridge {
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index 07b4cb877d82..839822d894d0 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -4,7 +4,8 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
-#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 #include <linux/pinctrl/consumer.h>
 
 #include "msm_kms.h"
@@ -68,30 +69,21 @@ static void msm_hdmi_phy_reset(struct hdmi *hdmi)
 
 static int gpio_config(struct hdmi *hdmi, bool on)
 {
-	struct device *dev = &hdmi->pdev->dev;
 	const struct hdmi_platform_config *config = hdmi->config;
-	int ret, i;
+	int i;
 
 	if (on) {
 		for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
 			struct hdmi_gpio_data gpio = config->gpios[i];
 
-			if (gpio.num != -1) {
-				ret = gpio_request(gpio.num, gpio.label);
-				if (ret) {
-					DRM_DEV_ERROR(dev,
-						"'%s'(%d) gpio_request failed: %d\n",
-						gpio.label, gpio.num, ret);
-					goto err;
-				}
-
+			if (gpio.gpiod) {
 				if (gpio.output) {
-					gpio_direction_output(gpio.num,
-							      gpio.value);
+					gpiod_direction_output(gpio.gpiod,
+							       gpio.value);
 				} else {
-					gpio_direction_input(gpio.num);
-					gpio_set_value_cansleep(gpio.num,
-								gpio.value);
+					gpiod_direction_input(gpio.gpiod);
+					gpiod_set_value_cansleep(gpio.gpiod,
+								 gpio.value);
 				}
 			}
 		}
@@ -101,29 +93,20 @@ static int gpio_config(struct hdmi *hdmi, bool on)
 		for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
 			struct hdmi_gpio_data gpio = config->gpios[i];
 
-			if (gpio.num == -1)
+			if (!gpio.gpiod)
 				continue;
 
 			if (gpio.output) {
 				int value = gpio.value ? 0 : 1;
 
-				gpio_set_value_cansleep(gpio.num, value);
+				gpiod_set_value_cansleep(gpio.gpiod, value);
 			}
-
-			gpio_free(gpio.num);
 		};
 
 		DBG("gpio off");
 	}
 
 	return 0;
-err:
-	while (i--) {
-		if (config->gpios[i].num != -1)
-			gpio_free(config->gpios[i].num);
-	}
-
-	return ret;
 }
 
 static void enable_hpd_clocks(struct hdmi *hdmi, bool enable)
@@ -311,7 +294,7 @@ static enum drm_connector_status detect_gpio(struct hdmi *hdmi)
 	const struct hdmi_platform_config *config = hdmi->config;
 	struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX];
 
-	return gpio_get_value(hpd_gpio.num) ?
+	return gpiod_get_value(hpd_gpio.gpiod) ?
 			connector_status_connected :
 			connector_status_disconnected;
 }
@@ -330,7 +313,7 @@ static enum drm_connector_status hdmi_connector_detect(
 	 * some platforms may not have hpd gpio. Rely only on the status
 	 * provided by REG_HDMI_HPD_INT_STATUS in this case.
 	 */
-	if (hpd_gpio.num == -1)
+	if (!hpd_gpio.gpiod)
 		return detect_reg(hdmi);
 
 	do {
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
index fe82ad38aa7a..a8f3b2cbfdc5 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 
 #include "hdmi.h"
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
index 1acc33ce9d52..95f2928cb2cb 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
@@ -4,6 +4,8 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/delay.h>
+
 #include "hdmi.h"
 
 static void hdmi_phy_8x60_powerup(struct hdmi_phy *phy,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
index e24a11d91945..562dfac67792 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
@@ -6,6 +6,8 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
+
 #include "hdmi.h"
 
 struct hdmi_pll_8960 {
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 169d5f915e68..5ccfad794c6a 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -6,45 +6,137 @@
 
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_vblank.h>
 
+#include "msm_atomic_trace.h"
 #include "msm_drv.h"
 #include "msm_gem.h"
 #include "msm_kms.h"
 
-static void msm_atomic_wait_for_commit_done(struct drm_device *dev,
-		struct drm_atomic_state *old_state)
+int msm_atomic_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *new_state)
 {
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *new_crtc_state;
-	struct msm_drm_private *priv = old_state->dev->dev_private;
+	struct msm_drm_private *priv = plane->dev->dev_private;
 	struct msm_kms *kms = priv->kms;
-	int i;
 
-	for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) {
-		if (!new_crtc_state->active)
-			continue;
+	if (!new_state->fb)
+		return 0;
 
-		if (drm_crtc_vblank_get(crtc))
-			continue;
+	drm_gem_fb_prepare_fb(plane, new_state);
 
-		kms->funcs->wait_for_crtc_commit_done(kms, crtc);
+	return msm_framebuffer_prepare(new_state->fb, kms->aspace);
+}
 
-		drm_crtc_vblank_put(crtc);
+static void msm_atomic_async_commit(struct msm_kms *kms, int crtc_idx)
+{
+	unsigned crtc_mask = BIT(crtc_idx);
+
+	trace_msm_atomic_async_commit_start(crtc_mask);
+
+	mutex_lock(&kms->commit_lock);
+
+	if (!(kms->pending_crtc_mask & crtc_mask)) {
+		mutex_unlock(&kms->commit_lock);
+		goto out;
 	}
+
+	kms->pending_crtc_mask &= ~crtc_mask;
+
+	kms->funcs->enable_commit(kms);
+
+	/*
+	 * Flush hardware updates:
+	 */
+	trace_msm_atomic_flush_commit(crtc_mask);
+	kms->funcs->flush_commit(kms, crtc_mask);
+	mutex_unlock(&kms->commit_lock);
+
+	/*
+	 * Wait for flush to complete:
+	 */
+	trace_msm_atomic_wait_flush_start(crtc_mask);
+	kms->funcs->wait_flush(kms, crtc_mask);
+	trace_msm_atomic_wait_flush_finish(crtc_mask);
+
+	mutex_lock(&kms->commit_lock);
+	kms->funcs->complete_commit(kms, crtc_mask);
+	mutex_unlock(&kms->commit_lock);
+	kms->funcs->disable_commit(kms);
+
+out:
+	trace_msm_atomic_async_commit_finish(crtc_mask);
 }
 
-int msm_atomic_prepare_fb(struct drm_plane *plane,
-			  struct drm_plane_state *new_state)
+static enum hrtimer_restart msm_atomic_pending_timer(struct hrtimer *t)
 {
-	struct msm_drm_private *priv = plane->dev->dev_private;
-	struct msm_kms *kms = priv->kms;
+	struct msm_pending_timer *timer = container_of(t,
+			struct msm_pending_timer, timer);
+	struct msm_drm_private *priv = timer->kms->dev->dev_private;
 
-	if (!new_state->fb)
-		return 0;
+	queue_work(priv->wq, &timer->work);
 
-	drm_gem_fb_prepare_fb(plane, new_state);
+	return HRTIMER_NORESTART;
+}
 
-	return msm_framebuffer_prepare(new_state->fb, kms->aspace);
+static void msm_atomic_pending_work(struct work_struct *work)
+{
+	struct msm_pending_timer *timer = container_of(work,
+			struct msm_pending_timer, work);
+
+	msm_atomic_async_commit(timer->kms, timer->crtc_idx);
+}
+
+void msm_atomic_init_pending_timer(struct msm_pending_timer *timer,
+		struct msm_kms *kms, int crtc_idx)
+{
+	timer->kms = kms;
+	timer->crtc_idx = crtc_idx;
+	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	timer->timer.function = msm_atomic_pending_timer;
+	INIT_WORK(&timer->work, msm_atomic_pending_work);
+}
+
+static bool can_do_async(struct drm_atomic_state *state,
+		struct drm_crtc **async_crtc)
+{
+	struct drm_connector_state *connector_state;
+	struct drm_connector *connector;
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	int i, num_crtcs = 0;
+
+	if (!(state->legacy_cursor_update || state->async_update))
+		return false;
+
+	/* any connector change, means slow path: */
+	for_each_new_connector_in_state(state, connector, connector_state, i)
+		return false;
+
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+		if (drm_atomic_crtc_needs_modeset(crtc_state))
+			return false;
+		if (++num_crtcs > 1)
+			return false;
+		*async_crtc = crtc;
+	}
+
+	return true;
+}
+
+/* Get bitmask of crtcs that will need to be flushed.  The bitmask
+ * can be used with for_each_crtc_mask() iterator, to iterate
+ * effected crtcs without needing to preserve the atomic state.
+ */
+static unsigned get_crtc_mask(struct drm_atomic_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	unsigned i, mask = 0;
+
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i)
+		mask |= drm_crtc_mask(crtc);
+
+	return mask;
 }
 
 void msm_atomic_commit_tail(struct drm_atomic_state *state)
@@ -52,26 +144,104 @@ void msm_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_device *dev = state->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_kms *kms = priv->kms;
+	struct drm_crtc *async_crtc = NULL;
+	unsigned crtc_mask = get_crtc_mask(state);
+	bool async = kms->funcs->vsync_time &&
+			can_do_async(state, &async_crtc);
+
+	trace_msm_atomic_commit_tail_start(async, crtc_mask);
+
+	kms->funcs->enable_commit(kms);
 
+	/*
+	 * Ensure any previous (potentially async) commit has
+	 * completed:
+	 */
+	trace_msm_atomic_wait_flush_start(crtc_mask);
+	kms->funcs->wait_flush(kms, crtc_mask);
+	trace_msm_atomic_wait_flush_finish(crtc_mask);
+
+	mutex_lock(&kms->commit_lock);
+
+	/*
+	 * Now that there is no in-progress flush, prepare the
+	 * current update:
+	 */
 	kms->funcs->prepare_commit(kms, state);
 
+	/*
+	 * Push atomic updates down to hardware:
+	 */
 	drm_atomic_helper_commit_modeset_disables(dev, state);
-
 	drm_atomic_helper_commit_planes(dev, state, 0);
-
 	drm_atomic_helper_commit_modeset_enables(dev, state);
 
-	if (kms->funcs->commit) {
-		DRM_DEBUG_ATOMIC("triggering commit\n");
-		kms->funcs->commit(kms, state);
-	}
+	if (async) {
+		struct msm_pending_timer *timer =
+			&kms->pending_timers[drm_crtc_index(async_crtc)];
 
-	if (!state->legacy_cursor_update)
-		msm_atomic_wait_for_commit_done(dev, state);
+		/* async updates are limited to single-crtc updates: */
+		WARN_ON(crtc_mask != drm_crtc_mask(async_crtc));
 
-	kms->funcs->complete_commit(kms, state);
+		/*
+		 * Start timer if we don't already have an update pending
+		 * on this crtc:
+		 */
+		if (!(kms->pending_crtc_mask & crtc_mask)) {
+			ktime_t vsync_time, wakeup_time;
 
-	drm_atomic_helper_commit_hw_done(state);
+			kms->pending_crtc_mask |= crtc_mask;
+
+			vsync_time = kms->funcs->vsync_time(kms, async_crtc);
+			wakeup_time = ktime_sub(vsync_time, ms_to_ktime(1));
+
+			hrtimer_start(&timer->timer, wakeup_time,
+					HRTIMER_MODE_ABS);
+		}
 
+		kms->funcs->disable_commit(kms);
+		mutex_unlock(&kms->commit_lock);
+
+		/*
+		 * At this point, from drm core's perspective, we
+		 * are done with the atomic update, so we can just
+		 * go ahead and signal that it is done:
+		 */
+		drm_atomic_helper_commit_hw_done(state);
+		drm_atomic_helper_cleanup_planes(dev, state);
+
+		trace_msm_atomic_commit_tail_finish(async, crtc_mask);
+
+		return;
+	}
+
+	/*
+	 * If there is any async flush pending on updated crtcs, fold
+	 * them into the current flush.
+	 */
+	kms->pending_crtc_mask &= ~crtc_mask;
+
+	/*
+	 * Flush hardware updates:
+	 */
+	trace_msm_atomic_flush_commit(crtc_mask);
+	kms->funcs->flush_commit(kms, crtc_mask);
+	mutex_unlock(&kms->commit_lock);
+
+	/*
+	 * Wait for flush to complete:
+	 */
+	trace_msm_atomic_wait_flush_start(crtc_mask);
+	kms->funcs->wait_flush(kms, crtc_mask);
+	trace_msm_atomic_wait_flush_finish(crtc_mask);
+
+	mutex_lock(&kms->commit_lock);
+	kms->funcs->complete_commit(kms, crtc_mask);
+	mutex_unlock(&kms->commit_lock);
+	kms->funcs->disable_commit(kms);
+
+	drm_atomic_helper_commit_hw_done(state);
 	drm_atomic_helper_cleanup_planes(dev, state);
+
+	trace_msm_atomic_commit_tail_finish(async, crtc_mask);
 }
diff --git a/drivers/gpu/drm/msm/msm_atomic_trace.h b/drivers/gpu/drm/msm/msm_atomic_trace.h
new file mode 100644
index 000000000000..b4ca0ed3b4a3
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_atomic_trace.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_MSM_GPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MSM_GPU_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM drm_msm_atomic
+#define TRACE_INCLUDE_FILE msm_atomic_trace
+
+TRACE_EVENT(msm_atomic_commit_tail_start,
+	    TP_PROTO(bool async, unsigned crtc_mask),
+	    TP_ARGS(async, crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(bool, async)
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->async = async;
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("async=%d crtc_mask=%x",
+		    __entry->async, __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_commit_tail_finish,
+	    TP_PROTO(bool async, unsigned crtc_mask),
+	    TP_ARGS(async, crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(bool, async)
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->async = async;
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("async=%d crtc_mask=%x",
+		    __entry->async, __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_async_commit_start,
+	    TP_PROTO(unsigned crtc_mask),
+	    TP_ARGS(crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("crtc_mask=%x",
+		    __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_async_commit_finish,
+	    TP_PROTO(unsigned crtc_mask),
+	    TP_ARGS(crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("crtc_mask=%x",
+		    __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_wait_flush_start,
+	    TP_PROTO(unsigned crtc_mask),
+	    TP_ARGS(crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("crtc_mask=%x",
+		    __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_wait_flush_finish,
+	    TP_PROTO(unsigned crtc_mask),
+	    TP_ARGS(crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("crtc_mask=%x",
+		    __entry->crtc_mask)
+);
+
+TRACE_EVENT(msm_atomic_flush_commit,
+	    TP_PROTO(unsigned crtc_mask),
+	    TP_ARGS(crtc_mask),
+	    TP_STRUCT__entry(
+		    __field(u32, crtc_mask)
+		    ),
+	    TP_fast_assign(
+		    __entry->crtc_mask = crtc_mask;
+		    ),
+	    TP_printk("crtc_mask=%x",
+		    __entry->crtc_mask)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/msm
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/msm/msm_atomic_tracepoints.c b/drivers/gpu/drm/msm/msm_atomic_tracepoints.c
new file mode 100644
index 000000000000..011dc881f391
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_atomic_tracepoints.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define CREATE_TRACE_POINTS
+#include "msm_atomic_trace.h"
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index a0a8df591e93..6be879578140 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -5,7 +5,12 @@
  */
 
 #ifdef CONFIG_DEBUG_FS
+
 #include <linux/debugfs.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+
 #include "msm_drv.h"
 #include "msm_gpu.h"
 #include "msm_kms.h"
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ee031c086805..c84f0a8b3f2c 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -5,9 +5,18 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/kthread.h>
+#include <linux/uaccess.h>
 #include <uapi/linux/sched/types.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_prime.h>
 #include <drm/drm_of.h>
+#include <drm/drm_vblank.h>
 
 #include "msm_drv.h"
 #include "msm_debugfs.h"
@@ -17,7 +26,6 @@
 #include "msm_kms.h"
 #include "adreno/adreno_gpu.h"
 
-
 /*
  * MSM driver version:
  * - 1.0.0 - initial interface
@@ -75,46 +83,6 @@ module_param(modeset, bool, 0600);
  * Util/helpers:
  */
 
-int msm_clk_bulk_get(struct device *dev, struct clk_bulk_data **bulk)
-{
-	struct property *prop;
-	const char *name;
-	struct clk_bulk_data *local;
-	int i = 0, ret, count;
-
-	count = of_property_count_strings(dev->of_node, "clock-names");
-	if (count < 1)
-		return 0;
-
-	local = devm_kcalloc(dev, sizeof(struct clk_bulk_data *),
-		count, GFP_KERNEL);
-	if (!local)
-		return -ENOMEM;
-
-	of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
-		local[i].id = devm_kstrdup(dev, name, GFP_KERNEL);
-		if (!local[i].id) {
-			devm_kfree(dev, local);
-			return -ENOMEM;
-		}
-
-		i++;
-	}
-
-	ret = devm_clk_bulk_get(dev, count, local);
-
-	if (ret) {
-		for (i = 0; i < count; i++)
-			devm_kfree(dev, (void *) local[i].id);
-		devm_kfree(dev, local);
-
-		return ret;
-	}
-
-	*bulk = local;
-	return count;
-}
-
 struct clk *msm_clk_bulk_get_clock(struct clk_bulk_data *bulk, int count,
 		const char *name)
 {
@@ -505,6 +473,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	ddev->mode_config.normalize_zpos = true;
 
 	if (kms) {
+		kms->dev = ddev;
 		ret = kms->funcs->hw_init(kms);
 		if (ret) {
 			DRM_DEV_ERROR(dev, "kms hw init failed: %d\n", ret);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index ee7b512dc158..71547e756e29 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -25,7 +25,6 @@
 #include <linux/sizes.h>
 #include <linux/kthread.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
@@ -222,8 +221,12 @@ struct msm_format {
 	uint32_t pixel_format;
 };
 
+struct msm_pending_timer;
+
 int msm_atomic_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *new_state);
+void msm_atomic_init_pending_timer(struct msm_pending_timer *timer,
+		struct msm_kms *kms, int crtc_idx);
 void msm_atomic_commit_tail(struct drm_atomic_state *state);
 struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev);
 void msm_atomic_state_clear(struct drm_atomic_state *state);
@@ -399,7 +402,6 @@ static inline void msm_perf_debugfs_cleanup(struct msm_drm_private *priv) {}
 #endif
 
 struct clk *msm_clk_get(struct platform_device *pdev, const char *name);
-int msm_clk_bulk_get(struct device *dev, struct clk_bulk_data **bulk);
 
 struct clk *msm_clk_bulk_get_clock(struct clk_bulk_data *bulk, int count,
 	const char *name);
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 5bcd5e502a6b..37674e886e99 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -6,6 +6,8 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_damage_helper.h>
+#include <drm/drm_file.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
 
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 2429d5e6ce9f..cff198b2f470 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -6,6 +6,7 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 
 #include "msm_drv.h"
 #include "msm_kms.h"
@@ -169,6 +170,9 @@ struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
+	/* the fw fb could be anywhere in memory */
+	drm_fb_helper_remove_conflicting_framebuffers(NULL, "msm", false);
+
 	ret = drm_fb_helper_initial_config(helper, 32);
 	if (ret)
 		goto fini;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 0c2a1252c8be..5a6a79fbc9d6 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -9,6 +9,8 @@
 #include <linux/dma-buf.h>
 #include <linux/pfn_t.h>
 
+#include <drm/drm_prime.h>
+
 #include "msm_drv.h"
 #include "msm_fence.h"
 #include "msm_gem.h"
@@ -50,7 +52,7 @@ static void sync_for_device(struct msm_gem_object *msm_obj)
 {
 	struct device *dev = msm_obj->base.dev->dev;
 
-	if (get_dma_ops(dev)) {
+	if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
 		dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
 			msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
 	} else {
@@ -63,7 +65,7 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj)
 {
 	struct device *dev = msm_obj->base.dev->dev;
 
-	if (get_dma_ops(dev)) {
+	if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
 		dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
 			msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
 	} else {
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 5d64e0671f7a..d7c8948427fe 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -4,11 +4,13 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/dma-buf.h>
+
+#include <drm/drm_prime.h>
+
 #include "msm_drv.h"
 #include "msm_gem.h"
 
-#include <linux/dma-buf.h>
-
 struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 2e1556b7af26..be5327af16fa 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -4,7 +4,11 @@
  * Author: Rob Clark <robdclark@gmail.com>
  */
 
+#include <linux/file.h>
 #include <linux/sync_file.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_file.h>
 
 #include "msm_drv.h"
 #include "msm_gpu.h"
@@ -26,8 +30,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 		uint32_t nr_cmds)
 {
 	struct msm_gem_submit *submit;
-	uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) +
-		((u64)nr_cmds * sizeof(submit->cmd[0]));
+	uint64_t sz = struct_size(submit, bos, nr_bos) +
+				  ((u64)nr_cmds * sizeof(submit->cmd[0]));
 
 	if (sz > SIZE_MAX)
 		return NULL;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index f7308d68c5ed..a052364a5d74 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -784,7 +784,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 
 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
 {
-	int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks);
+	int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks);
 
 	if (ret < 1) {
 		gpu->nr_clocks = 0;
diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h
index 1155118a27a1..122b84789238 100644
--- a/drivers/gpu/drm/msm/msm_gpu_trace.h
+++ b/drivers/gpu/drm/msm/msm_gpu_trace.h
@@ -5,7 +5,7 @@
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM drm_msm
+#define TRACE_SYSTEM drm_msm_gpu
 #define TRACE_INCLUDE_FILE msm_gpu_trace
 
 TRACE_EVENT(msm_gpu_submit,
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c
index 27312b553dd8..34f643a0c28a 100644
--- a/drivers/gpu/drm/msm/msm_gpummu.c
+++ b/drivers/gpu/drm/msm/msm_gpummu.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
 
+#include <linux/dma-mapping.h>
+
 #include "msm_drv.h"
 #include "msm_mmu.h"
 #include "adreno/adreno_gpu.h"
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index c7588a42635e..1cbef6b200b7 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -30,13 +30,76 @@ struct msm_kms_funcs {
 	irqreturn_t (*irq)(struct msm_kms *kms);
 	int (*enable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc);
 	void (*disable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc);
-	/* modeset, bracketing atomic_commit(): */
+
+	/*
+	 * Atomic commit handling:
+	 *
+	 * Note that in the case of async commits, the funcs which take
+	 * a crtc_mask (ie. ->flush_commit(), and ->complete_commit())
+	 * might not be evenly balanced with ->prepare_commit(), however
+	 * each crtc that effected by a ->prepare_commit() (potentially
+	 * multiple times) will eventually (at end of vsync period) be
+	 * flushed and completed.
+	 *
+	 * This has some implications about tracking of cleanup state,
+	 * for example SMP blocks to release after commit completes.  Ie.
+	 * cleanup state should be also duplicated in the various
+	 * duplicate_state() methods, as the current cleanup state at
+	 * ->complete_commit() time may have accumulated cleanup work
+	 * from multiple commits.
+	 */
+
+	/**
+	 * Enable/disable power/clks needed for hw access done in other
+	 * commit related methods.
+	 *
+	 * If mdp4 is migrated to runpm, we could probably drop these
+	 * and use runpm directly.
+	 */
+	void (*enable_commit)(struct msm_kms *kms);
+	void (*disable_commit)(struct msm_kms *kms);
+
+	/**
+	 * If the kms backend supports async commit, it should implement
+	 * this method to return the time of the next vsync.  This is
+	 * used to determine a time slightly before vsync, for the async
+	 * commit timer to run and complete an async commit.
+	 */
+	ktime_t (*vsync_time)(struct msm_kms *kms, struct drm_crtc *crtc);
+
+	/**
+	 * Prepare for atomic commit.  This is called after any previous
+	 * (async or otherwise) commit has completed.
+	 */
 	void (*prepare_commit)(struct msm_kms *kms, struct drm_atomic_state *state);
-	void (*commit)(struct msm_kms *kms, struct drm_atomic_state *state);
-	void (*complete_commit)(struct msm_kms *kms, struct drm_atomic_state *state);
-	/* functions to wait for atomic commit completed on each CRTC */
-	void (*wait_for_crtc_commit_done)(struct msm_kms *kms,
-					struct drm_crtc *crtc);
+
+	/**
+	 * Flush an atomic commit.  This is called after the hardware
+	 * updates have already been pushed down to effected planes/
+	 * crtcs/encoders/connectors.
+	 */
+	void (*flush_commit)(struct msm_kms *kms, unsigned crtc_mask);
+
+	/**
+	 * Wait for any in-progress flush to complete on the specified
+	 * crtcs.  This should not block if there is no in-progress
+	 * commit (ie. don't just wait for a vblank), as it will also
+	 * be called before ->prepare_commit() to ensure any potential
+	 * "async" commit has completed.
+	 */
+	void (*wait_flush)(struct msm_kms *kms, unsigned crtc_mask);
+
+	/**
+	 * Clean up after commit is completed.  This is called after
+	 * ->wait_flush(), to give the backend a chance to do any
+	 * post-commit cleanup.
+	 */
+	void (*complete_commit)(struct msm_kms *kms, unsigned crtc_mask);
+
+	/*
+	 * Format handling:
+	 */
+
 	/* get msm_format w/ optional format modifiers from drm_mode_fb_cmd2 */
 	const struct msm_format *(*get_format)(struct msm_kms *kms,
 					const uint32_t format,
@@ -46,6 +109,7 @@ struct msm_kms_funcs {
 			const struct msm_format *msm_fmt,
 			const struct drm_mode_fb_cmd2 *cmd,
 			struct drm_gem_object **bos);
+
 	/* misc: */
 	long (*round_pixclk)(struct msm_kms *kms, unsigned long rate,
 			struct drm_encoder *encoder);
@@ -64,20 +128,48 @@ struct msm_kms_funcs {
 #endif
 };
 
+struct msm_kms;
+
+/*
+ * A per-crtc timer for pending async atomic flushes.  Scheduled to expire
+ * shortly before vblank to flush pending async updates.
+ */
+struct msm_pending_timer {
+	struct hrtimer timer;
+	struct work_struct work;
+	struct msm_kms *kms;
+	unsigned crtc_idx;
+};
+
 struct msm_kms {
 	const struct msm_kms_funcs *funcs;
+	struct drm_device *dev;
 
 	/* irq number to be passed on to drm_irq_install */
 	int irq;
 
 	/* mapper-id used to request GEM buffer mapped for scanout: */
 	struct msm_gem_address_space *aspace;
+
+	/*
+	 * For async commit, where ->flush_commit() and later happens
+	 * from the crtc's pending_timer close to end of the frame:
+	 */
+	struct mutex commit_lock;
+	unsigned pending_crtc_mask;
+	struct msm_pending_timer pending_timers[MAX_CRTCS];
 };
 
 static inline void msm_kms_init(struct msm_kms *kms,
 		const struct msm_kms_funcs *funcs)
 {
+	unsigned i;
+
+	mutex_init(&kms->commit_lock);
 	kms->funcs = funcs;
+
+	for (i = 0; i < ARRAY_SIZE(kms->pending_timers); i++)
+		msm_atomic_init_pending_timer(&kms->pending_timers[i], kms, i);
 }
 
 struct msm_kms *mdp4_kms_init(struct drm_device *dev);
@@ -98,4 +190,8 @@ struct msm_mdss {
 int mdp5_mdss_init(struct drm_device *dev);
 int dpu_mdss_init(struct drm_device *dev);
 
+#define for_each_crtc_mask(dev, crtc, crtc_mask) \
+	drm_for_each_crtc(crtc, dev) \
+		for_each_if (drm_crtc_mask(crtc) & (crtc_mask))
+
 #endif /* __MSM_KMS_H__ */
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 490cadda2796..3a27153eef08 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -15,6 +15,9 @@
 #ifdef CONFIG_DEBUG_FS
 
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_file.h>
 
 #include "msm_drv.h"
 #include "msm_gpu.h"
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 76d3fdd17bf8..c7832a951039 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -31,11 +31,14 @@
 
 #ifdef CONFIG_DEBUG_FS
 
-#include <linux/kfifo.h>
-#include <linux/debugfs.h>
 #include <linux/circ_buf.h>
+#include <linux/debugfs.h>
+#include <linux/kfifo.h>
+#include <linux/uaccess.h>
 #include <linux/wait.h>
 
+#include <drm/drm_file.h>
+
 #include "msm_drv.h"
 #include "msm_gpu.h"
 #include "msm_gem.h"
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index c70e00e22c4c..001fbf537440 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -3,6 +3,8 @@
  */
 
 #include <linux/kref.h>
+#include <linux/uaccess.h>
+
 #include "msm_gpu.h"
 
 void msm_submitqueue_destroy(struct kref *kref)
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 710d903f8e0d..a1f5fa6a742a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -53,8 +53,10 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
 	if (err) {
 		dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
 			err);
-		regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt,
-				      pfdev->devfreq.cur_volt);
+		if (pfdev->regulator)
+			regulator_set_voltage(pfdev->regulator,
+					      pfdev->devfreq.cur_volt,
+					      pfdev->devfreq.cur_volt);
 		return err;
 	}
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 4da71bb56c20..46b0b02e4289 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -5,7 +5,6 @@
 #include <linux/clk.h>
 #include <linux/reset.h>
 #include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 
 #include "panfrost_device.h"
@@ -125,7 +124,6 @@ int panfrost_device_init(struct panfrost_device *pfdev)
 	INIT_LIST_HEAD(&pfdev->scheduled_jobs);
 	INIT_LIST_HEAD(&pfdev->as_lru_list);
 
-	spin_lock_init(&pfdev->hwaccess_lock);
 	spin_lock_init(&pfdev->as_lock);
 
 	err = panfrost_clk_init(pfdev);
@@ -166,14 +164,6 @@ int panfrost_device_init(struct panfrost_device *pfdev)
 	if (err)
 		goto err_out4;
 
-	/* runtime PM will wake us up later */
-	panfrost_gpu_power_off(pfdev);
-
-	pm_runtime_set_active(pfdev->dev);
-	pm_runtime_get_sync(pfdev->dev);
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
-
 	err = panfrost_perfcnt_init(pfdev);
 	if (err)
 		goto err_out5;
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index f503c566e99f..9c39b9794811 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -63,8 +63,6 @@ struct panfrost_device {
 	struct drm_device *ddev;
 	struct platform_device *pdev;
 
-	spinlock_t hwaccess_lock;
-
 	void __iomem *iomem;
 	struct clk *clock;
 	struct clk *bus_clock;
@@ -104,7 +102,6 @@ struct panfrost_device {
 struct panfrost_mmu {
 	struct io_pgtable_cfg pgtbl_cfg;
 	struct io_pgtable_ops *pgtbl_ops;
-	struct mutex lock;
 	int as;
 	atomic_t as_count;
 	struct list_head list;
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 44a558c6e17e..bc2ddeb55f5d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -523,10 +523,6 @@ static int panfrost_probe(struct platform_device *pdev)
 	mutex_init(&pfdev->shrinker_lock);
 	INIT_LIST_HEAD(&pfdev->shrinker_list);
 
-	pm_runtime_use_autosuspend(pfdev->dev);
-	pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
-	pm_runtime_enable(pfdev->dev);
-
 	err = panfrost_device_init(pfdev);
 	if (err) {
 		if (err != -EPROBE_DEFER)
@@ -541,6 +537,12 @@ static int panfrost_probe(struct platform_device *pdev)
 		goto err_out1;
 	}
 
+	pm_runtime_set_active(pfdev->dev);
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_enable(pfdev->dev);
+	pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
+	pm_runtime_use_autosuspend(pfdev->dev);
+
 	/*
 	 * Register the DRM device with the core and the connectors with
 	 * sysfs
@@ -570,11 +572,13 @@ static int panfrost_remove(struct platform_device *pdev)
 
 	drm_dev_unregister(ddev);
 	panfrost_gem_shrinker_cleanup(ddev);
+
 	pm_runtime_get_sync(pfdev->dev);
-	pm_runtime_put_sync_autosuspend(pfdev->dev);
-	pm_runtime_disable(pfdev->dev);
 	panfrost_devfreq_fini(pfdev);
 	panfrost_device_fini(pfdev);
+	pm_runtime_put_sync_suspend(pfdev->dev);
+	pm_runtime_disable(pfdev->dev);
+
 	drm_dev_put(ddev);
 	return 0;
 }
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
index d191632b6197..458f0fa68111 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
@@ -36,15 +36,18 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc
 	return count;
 }
 
-static void panfrost_gem_purge(struct drm_gem_object *obj)
+static bool panfrost_gem_purge(struct drm_gem_object *obj)
 {
 	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
-	mutex_lock(&shmem->pages_lock);
+
+	if (!mutex_trylock(&shmem->pages_lock))
+		return false;
 
 	panfrost_mmu_unmap(to_panfrost_bo(obj));
 	drm_gem_shmem_purge_locked(obj);
 
 	mutex_unlock(&shmem->pages_lock);
+	return true;
 }
 
 static unsigned long
@@ -61,8 +64,8 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) {
 		if (freed >= sc->nr_to_scan)
 			break;
-		if (drm_gem_shmem_is_purgeable(shmem)) {
-			panfrost_gem_purge(&shmem->base);
+		if (drm_gem_shmem_is_purgeable(shmem) &&
+		    panfrost_gem_purge(&shmem->base)) {
 			freed += shmem->base.size >> PAGE_SHIFT;
 			list_del_init(&shmem->madv_list);
 		}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 05c85f45a0de..a58551668d9a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -141,7 +141,6 @@ static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
 static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 {
 	struct panfrost_device *pfdev = job->pfdev;
-	unsigned long flags;
 	u32 cfg;
 	u64 jc_head = job->jc;
 	int ret;
@@ -150,13 +149,14 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 	if (ret < 0)
 		return;
 
-	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js))))
-		goto end;
+	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
+		pm_runtime_put_sync_autosuspend(pfdev->dev);
+		return;
+	}
 
 	cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu);
 
 	panfrost_devfreq_record_transition(pfdev, js);
-	spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
 
 	job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF);
 	job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32);
@@ -185,12 +185,6 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 				job, js, jc_head);
 
 	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
-
-	spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags);
-
-end:
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
 }
 
 static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
@@ -369,6 +363,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	struct panfrost_job *job = to_panfrost_job(sched_job);
 	struct panfrost_device *pfdev = job->pfdev;
 	int js = panfrost_job_get_slot(job);
+	unsigned long flags;
 	int i;
 
 	/*
@@ -394,6 +389,15 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	if (sched_job)
 		drm_sched_increase_karma(sched_job);
 
+	spin_lock_irqsave(&pfdev->js->job_lock, flags);
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		if (pfdev->jobs[i]) {
+			pm_runtime_put_noidle(pfdev->dev);
+			pfdev->jobs[i] = NULL;
+		}
+	}
+	spin_unlock_irqrestore(&pfdev->js->job_lock, flags);
+
 	/* panfrost_core_dump(pfdev); */
 
 	panfrost_devfreq_record_transition(pfdev, js);
@@ -450,12 +454,21 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
 		}
 
 		if (status & JOB_INT_MASK_DONE(j)) {
-			struct panfrost_job *job = pfdev->jobs[j];
+			struct panfrost_job *job;
+
+			spin_lock(&pfdev->js->job_lock);
+			job = pfdev->jobs[j];
+			/* Only NULL if job timeout occurred */
+			if (job) {
+				pfdev->jobs[j] = NULL;
+
+				panfrost_mmu_as_put(pfdev, &job->file_priv->mmu);
+				panfrost_devfreq_record_transition(pfdev, j);
 
-			pfdev->jobs[j] = NULL;
-			panfrost_mmu_as_put(pfdev, &job->file_priv->mmu);
-			panfrost_devfreq_record_transition(pfdev, j);
-			dma_fence_signal(job->done_fence);
+				dma_fence_signal_locked(job->done_fence);
+				pm_runtime_put_autosuspend(pfdev->dev);
+			}
+			spin_unlock(&pfdev->js->job_lock);
 		}
 
 		status &= ~mask;
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 842bdd7cf6be..387d830cb7cf 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -80,19 +80,11 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
 }
 
 
-static int mmu_hw_do_operation(struct panfrost_device *pfdev,
-			       struct panfrost_mmu *mmu,
-			       u64 iova, size_t size, u32 op)
+static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
+				      u64 iova, size_t size, u32 op)
 {
-	int ret, as_nr;
-
-	spin_lock(&pfdev->as_lock);
-	as_nr = mmu->as;
-
-	if (as_nr < 0) {
-		spin_unlock(&pfdev->as_lock);
+	if (as_nr < 0)
 		return 0;
-	}
 
 	if (op != AS_COMMAND_UNLOCK)
 		lock_region(pfdev, as_nr, iova, size);
@@ -101,10 +93,18 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev,
 	write_cmd(pfdev, as_nr, op);
 
 	/* Wait for the flush to complete */
-	ret = wait_ready(pfdev, as_nr);
+	return wait_ready(pfdev, as_nr);
+}
 
-	spin_unlock(&pfdev->as_lock);
+static int mmu_hw_do_operation(struct panfrost_device *pfdev,
+			       struct panfrost_mmu *mmu,
+			       u64 iova, size_t size, u32 op)
+{
+	int ret;
 
+	spin_lock(&pfdev->as_lock);
+	ret = mmu_hw_do_operation_locked(pfdev, mmu->as, iova, size, op);
+	spin_unlock(&pfdev->as_lock);
 	return ret;
 }
 
@@ -115,6 +115,8 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 	u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
 	u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
 
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+
 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
 
@@ -127,8 +129,10 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
 }
 
-static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
+static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
 {
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+
 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
 
@@ -220,6 +224,22 @@ static size_t get_pgsize(u64 addr, size_t size)
 	return SZ_2M;
 }
 
+void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+			      struct panfrost_mmu *mmu,
+			      u64 iova, size_t size)
+{
+	if (mmu->as < 0)
+		return;
+
+	pm_runtime_get_noresume(pfdev->dev);
+
+	/* Flush the PTs only if we're already awake */
+	if (pm_runtime_active(pfdev->dev))
+		mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+
+	pm_runtime_put_sync_autosuspend(pfdev->dev);
+}
+
 static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 		      u64 iova, int prot, struct sg_table *sgt)
 {
@@ -228,8 +248,6 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
 	u64 start_iova = iova;
 
-	mutex_lock(&mmu->lock);
-
 	for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
 		unsigned long paddr = sg_dma_address(sgl);
 		size_t len = sg_dma_len(sgl);
@@ -246,10 +264,7 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 		}
 	}
 
-	mmu_hw_do_operation(pfdev, mmu, start_iova, iova - start_iova,
-			    AS_COMMAND_FLUSH_PT);
-
-	mutex_unlock(&mmu->lock);
+	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
 
 	return 0;
 }
@@ -259,7 +274,6 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	struct drm_gem_object *obj = &bo->base.base;
 	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
 	struct sg_table *sgt;
-	int ret;
 	int prot = IOMMU_READ | IOMMU_WRITE;
 
 	if (WARN_ON(bo->is_mapped))
@@ -272,14 +286,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	if (WARN_ON(IS_ERR(sgt)))
 		return PTR_ERR(sgt);
 
-	ret = pm_runtime_get_sync(pfdev->dev);
-	if (ret < 0)
-		return ret;
-
 	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
-
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
 	bo->is_mapped = true;
 
 	return 0;
@@ -293,19 +300,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
 	u64 iova = bo->node.start << PAGE_SHIFT;
 	size_t len = bo->node.size << PAGE_SHIFT;
 	size_t unmapped_len = 0;
-	int ret;
 
 	if (WARN_ON(!bo->is_mapped))
 		return;
 
 	dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len);
 
-	ret = pm_runtime_get_sync(pfdev->dev);
-	if (ret < 0)
-		return;
-
-	mutex_lock(&bo->mmu->lock);
-
 	while (unmapped_len < len) {
 		size_t unmapped_page;
 		size_t pgsize = get_pgsize(iova, len - unmapped_len);
@@ -318,22 +318,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
 		unmapped_len += pgsize;
 	}
 
-	mmu_hw_do_operation(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
-			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
-
-	mutex_unlock(&bo->mmu->lock);
-
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
+	panfrost_mmu_flush_range(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, len);
 	bo->is_mapped = false;
 }
 
 static void mmu_tlb_inv_context_s1(void *cookie)
-{
-	struct panfrost_file_priv *priv = cookie;
-
-	mmu_hw_do_operation(priv->pfdev, &priv->mmu, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
-}
+{}
 
 static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 				     size_t granule, bool leaf, void *cookie)
@@ -356,7 +346,6 @@ int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv)
 	struct panfrost_mmu *mmu = &priv->mmu;
 	struct panfrost_device *pfdev = priv->pfdev;
 
-	mutex_init(&mmu->lock);
 	INIT_LIST_HEAD(&mmu->list);
 	mmu->as = -1;
 
@@ -383,6 +372,11 @@ void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv)
 
 	spin_lock(&pfdev->as_lock);
 	if (mmu->as >= 0) {
+		pm_runtime_get_noresume(pfdev->dev);
+		if (pm_runtime_active(pfdev->dev))
+			panfrost_mmu_disable(pfdev, mmu->as);
+		pm_runtime_put_autosuspend(pfdev->dev);
+
 		clear_bit(mmu->as, &pfdev->as_alloc_mask);
 		clear_bit(mmu->as, &pfdev->as_in_use_mask);
 		list_del(&mmu->list);
@@ -627,5 +621,4 @@ int panfrost_mmu_init(struct panfrost_device *pfdev)
 void panfrost_mmu_fini(struct panfrost_device *pfdev)
 {
 	mmu_write(pfdev, MMU_INT_MASK, 0);
-	mmu_disable(pfdev, 0);
 }